diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..61e064a6473eb96ad34023f83892f84de61fb3d1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,31 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +assets/network.png filter=lfs diff=lfs merge=lfs -text +assets/Pipeline.png filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]1659315.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]3223141571376159.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]3486521.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]5102676.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]5570824.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]5674950.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]5828407151952509.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]5851155317235124.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]6429789966786911.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]6813581942189493.jpg filter=lfs diff=lfs merge=lfs -text +real_manga/class1/Color[[:space:]]8096755.jpg filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/001_in.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/002_in_ref_b.jpeg filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/004_in.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/005_in.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/006_in.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/006_ref.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/out/001_in_color_a.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/out/001_in_color_b.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/out/002_in_color_a.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/out/002_in_color_b.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/out/003_in_color_a.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/out/003_in_color_b.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/out/004_in_color.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/out/005_in_color.png filter=lfs diff=lfs merge=lfs -text +test_datasets/gray_test/out/006_in_color.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..88686fee742daa3cc03cda5bb17d99abd24066b6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +### Example user template template +### Example user template + +# IntelliJ project files +.idea +*.iml +out +gen + +# Debug file +datacheck.py +test_gray2color.py +val.py + +experiments/ +misc/ +results/ +test_datasets/* +!/test_datasets/gray_test +!/test_datasets/gray_test/out +!/test_datasets/sketch_test +!/test_datasets/sketch_test/out +train_datasets/ +training_logs/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0dae0d83673e808026772999ddb894c547d6ddfe --- /dev/null +++ b/README.md @@ -0,0 +1,262 @@ +# Reference-Image-Embed-Manga-Colorization + +An amazing manga colorization project + +You can colorize gray manga or character sketches using any reference image you want, this model will faithfully retain the color features and transfer them to your manga. This is useful when you wish the color of the character's hair or clothes to be consistent. + +If the project is helpful, please leave a ⭐ this repo. best luck, my friend 😊
+ +## Overview +

+ +

+ +It's basically a cGAN(Conditional Generative Adversarial Network) architecture. + +### Generator + +Generator is divided into two parts. + +`Color Embedding Layer` consists of part of pretrained VGG19 net and an MLP(Multilayer Perceptron), which is used to extract `color embedding` from reference image(for training, its preprocessed Ground Truth Image). + +Another part is a U-net-like network. The encoder layer extracts `content embedding` from gray input image(only contains L-channel information), and the decoder layer reconstructs the image with `color embedding` through PFFB(Progressive Feature Formalization Block) and outputs the ab_channel information. + +

+ +

+ +The figure shows how PFFB works. + +It generates a filter by applying color embedding, and then convolving with content features. The figure is from this [paper](https://arxiv.org/abs/2106.08017) and check it for more details. + +### Discriminator + +Discriminator is a PatchGAN, referring to [pix2pix](https://arxiv.org/abs/1611.07004v3). The difference is that there are two conditions used for input. One is the gray image waiting for colorization, and one is the reference image providing color information. + +### Loss + +There are three losses in total, `L1 loss`, `perceptual loss` produced by pretrained vgg19, and `adversarial loss` produced by discriminator. The ratio is `1: 0.1: 0.01`. + +### Pipeline + +

+ +

+ +- a. Segment panels from input manga image, `Manga-Panel-Extractor` is from [here](https://github.com/pvnieo/Manga-Panel-Extractor). +- b. Select a reference image for each panel, and generator will colorize each panel. +- c. Concatenate all colorized panels into original format. + +## Results +### Gray model + +| Original | Reference | Colorization | +|:----------:|:-----------:|:----------:| +| | | | +| | | | +| | | | +| | | | +| | | | +| | | | +| || | +| | | | +| | | | + +### sketch model + +| Original | Reference | Colorization | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | +| | | | +| | | | + + + +## Dependencies and Installation + +1. Clone this GitHub repo. + ``` + git clone https://github.com/linSensiGit/Example_Based_Manga_Colorization---cGAN.git + + cd Example_Based_Manga_Colorization---cGAN + ``` + +2. Create Environment + - Python >= 3.6 (Recommend to use [Anaconda](https://www.anaconda.com/download/#linux)) + + - [PyTorch >= 1.5.0](https://pytorch.org/) (Default GPU mode) + + ``` + # My environment for reference + - Python = 3.9.15 + - PyTorch = 1.13.0 + - Torchvision = 0.14.0 + - Cuda = 11.7 + - GPU = RTX 3060ti + ``` + +3. Install Dependencies + + ``` + pip3 install -r requirement.txt + ``` + +## Get Started + +Once you've set up the environment, several things need to be done before colorization. + +### Prepare pretrained models + +1. Download generator. I have trained two generators, for [gray manga](https://drive.google.com/file/d/11RQGvBKySEtRcBdYD8O5ZLb54jB7SAgN/view?usp=drive_link) colorization and [sketch](https://drive.google.com/file/d/1I4XwOYIGAoQwMOicknZl0s6AWcwpARmR/view?usp=drive_link) colorization. Choose what you need. + +2. Download [VGG model](https://drive.google.com/file/d/1S7t3mD-tznEUrMmq5bRsLZk4fkN24QSV/view?usp=drive_link) , it's part of generator. + +3. Download discriminator, for training [gray manga](https://drive.google.com/file/d/1DHHE9um_xOm0brTpbHb_R7K7J4mn37FS/view?usp=drive_link) colorization and [sketch](https://drive.google.com/file/d/1WgIPYY4b4GcpHW9EWFrFoTxL9SlilQbN/view?usp=drive_link) colorization. (optional) + +4. Put the pretrained model in the correct directory: + + ``` + Colorful-Manga-GAN + |- experiments + |- Color2Manga_gray + |- xxx000_gray.pt + |- Color2Manga_sketch + |- xxx000_sketch.pt + |- Discriminator + |- xxx000_d.pt + |- VGG19 + |- vgg19-dcbb9e9d.pth + ``` + +### Quick test + +I have collected some test datasets which contain manga pages and corresponding reference images. You can check it in the path `./test_datasets`. When you use the file `inference.py` to test, you may need to edit the input file path or pretrained weights path in this file. + +``` +python inference.py + +# If you don't want to segment your manga +python inference.py -ne +``` +Initially, `Manga-Panel-Extractor` will segment the manga page into panels. + +Then follow the instructions in the console and you will get the colorized image. + +## Train your Own Model +### Prepare Datasets + +There are three datasets I used to train the model. + +For gray model, [Anime Face Dataset](https://www.kaggle.com/datasets/scribbless/another-anime-face-dataset) and Tagged [Anime Illustrations Dataset](https://www.kaggle.com/datasets/mylesoneill/tagged-anime-illustrations) are used. And I only use `danbooru-images` folder in the second Dataset. + +For sketch model, [Anime Sketch Colorization Pair Dataset](https://www.kaggle.com/datasets/ktaebum/anime-sketch-colorization-pair) is used. + +All the datasets are from [Kaggle](https://www.kaggle.com/). + +Follow instructions are based on my dataset, but feel free to use your own dataset if you like. + +### Preprocess training data + +``` +cd data +python prepare_data.py +``` + +If you are using ` Anime Sketch Colorization Pair` dataset : + +``` +python prepare_data_sketch.py +``` + +Several arguments needed to be assigned : + +``` +usage: prepare_data.py [-h] [--out OUT] [--size SIZE] [--n_worker N_WORKER] + [--resample RESAMPLE] + path +positional arguments: + path the path of datasets +optional arguments: + -h, --help show this help message and exit + --out OUT the path to save generated lmdb + --size SIZE compressed image size (128, 256, 512, 1024) alternative + --n_worker N_WORKER The number of threads, depends on your CPU + --resample RESAMPLE +``` + +For instance, you can run the command like this: + +``` +python prepare_data.py --out ../train_datasets/Sketch_train_lmdb --n_worker 20 --size 256 E:/Dataset/animefaces256cleaner +``` + +### Training + +There are four scripts in total for training + +`train.py` —— train only generator + +`train_disc` —— train only discriminator + +`train_all_gray.py`—— train both generator and discriminator, under the usual dataset + +`train_all_sketch.py`—— train both generator and discriminator, under sketch pair dataset specific + + + +All of these scripts share similar commands to drive: + +``` +usage: train_all_gray.py [-h] [--datasets DATASETS] [--iter ITER] + [--batch BATCH] [--size SIZE] [--ckpt CKPT] + [--ckpt_disc CKPT_DISC] [--lr LR] [--lr_disc LR_DISC] + [--experiment_name EXPERIMENT_NAME] [--wandb] + [--local_rank LOCAL_RANK] +optional arguments: + -h, --help show this help message and exit + --datasets DATASETS the path of training dataset + --iter ITER number of iteration in total + --batch BATCH batch size + --size SIZE size of image in dataset, usually 256 + --ckpt CKPT path of pretrained generator + --ckpt_disc CKPT_DISC path of pretrained discriminator + --lr LR learning rate of generator + --lr_disc LR_DISC learning rate of discriminator + --experiment_name EXPERIMENT_NAME used to save training_logs and trained model + --wandb + --local_rank LOCAL_RANK +``` + +There may be a slight difference, you could check the code for more details. + + + +For instance, you can run the command like this: + +``` +python train_all_gray.py --batch 8 --experiment_name Color2Manga_sketch --ckpt experiments/Color2Manga_sketch/078000.pt --datasets ./train_datasets/Sketch_train_lmdb --ckpt_disc experiments/Discriminator/078000_d.pt +``` + +## Work in Progress +- [ ] Add SR model instead of directly interpolate upscaling +- [ ] Optimize the generator network(adding L-channel information to output which is essential for colorize sketch) +- [ ] Better developed manga-panel-extractor(current segmentation is not precise enough) +- [ ] Develop a front UI and add color hint so that users could adjust the color of a specific area + +## 😁Contact + +If you have any questions, please feel free to contact me via `shizifeng0615@outlook.com` + +## 🙌 Acknowledgement +Based on https://github.com/zhaohengyuan1/Color2Embed + +Thx https://github.com/pvnieo/Manga-Panel-Extractor + +## Reference + +[1] Zhao, Hengyuan et al. “Color2Embed: Fast Exemplar-Based Image Colorization using Color Embeddings.” (2021). + +[2] Isola, Phillip et al. “Image-to-Image Translation with Conditional Adversarial Networks.” *2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)* (2016): 5967-5976. + +[3] Furusawa, Chie et al. “Comicolorization: semi-automatic manga colorization.” *SIGGRAPH Asia 2017 Technical Briefs* (2017): n. pag. + +[4] Satoshi Iizuka, Edgar Simo-Serra, and Hiroshi Ishikawa. "Let there be Color!: Joint End-to-end Learning of Global and Local Image Priors for Automatic Image Colorization with Simultaneous Classification". ACM Transaction on Graphics (Proc. of SIGGRAPH), 35(4):110, 2016. diff --git a/__pycache__/distributed.cpython-310.pyc b/__pycache__/distributed.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55cba74d2825da7b4d5885a47592be06811a3972 Binary files /dev/null and b/__pycache__/distributed.cpython-310.pyc differ diff --git a/__pycache__/models.cpython-310.pyc b/__pycache__/models.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee9f5a2b85ea53b3da24f19306a52f0c80631cda Binary files /dev/null and b/__pycache__/models.cpython-310.pyc differ diff --git a/__pycache__/models.cpython-38.pyc b/__pycache__/models.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f73e5d75da255c994fc860406b2df9547c15d667 Binary files /dev/null and b/__pycache__/models.cpython-38.pyc differ diff --git a/__pycache__/utils.cpython-310.pyc b/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b289439289388a9f86abc8773c9722f376dd0b0 Binary files /dev/null and b/__pycache__/utils.cpython-310.pyc differ diff --git a/__pycache__/utils.cpython-38.pyc b/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c4640681e88be98ba8d3c4502fdcf966b2fe0d3 Binary files /dev/null and b/__pycache__/utils.cpython-38.pyc differ diff --git a/__pycache__/vgg_model.cpython-310.pyc b/__pycache__/vgg_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6411e374a93cfde32e0b8f91c0e1867a3c4c2f29 Binary files /dev/null and b/__pycache__/vgg_model.cpython-310.pyc differ diff --git a/__pycache__/vgg_model.cpython-38.pyc b/__pycache__/vgg_model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d1633200a8b60bdde11b012fef9b7544e2d0aec Binary files /dev/null and b/__pycache__/vgg_model.cpython-38.pyc differ diff --git a/assets/PFFB.png b/assets/PFFB.png new file mode 100644 index 0000000000000000000000000000000000000000..78f08d670ed2782312af861e053d06c89494cfbc Binary files /dev/null and b/assets/PFFB.png differ diff --git a/assets/Pipeline.png b/assets/Pipeline.png new file mode 100644 index 0000000000000000000000000000000000000000..99634a451b3383cde12bc7947e3c403f9a3d3d86 --- /dev/null +++ b/assets/Pipeline.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86e46eb233e565ec37601e8af382f3c1fa87e939adce1aa84648485c97a93840 +size 1957937 diff --git a/assets/network.png b/assets/network.png new file mode 100644 index 0000000000000000000000000000000000000000..d8a02017cee27de03f5e2767b3098657f0c83d48 --- /dev/null +++ b/assets/network.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136ce29bf0db8f123947506ac32db2f13fefd4ad2b7b5f29db0623558ee66fc9 +size 1198209 diff --git a/data/__pycache__/data_loader.cpython-310.pyc b/data/__pycache__/data_loader.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e420181033ed6fd61fb146af5f2ac58853dacd4f Binary files /dev/null and b/data/__pycache__/data_loader.cpython-310.pyc differ diff --git a/data/__pycache__/tps_transformation.cpython-310.pyc b/data/__pycache__/tps_transformation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aaec7b28f942224c58c396246fbeb08590e67846 Binary files /dev/null and b/data/__pycache__/tps_transformation.cpython-310.pyc differ diff --git a/data/data_loader.py b/data/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..904c9152ca42298ca939ed37c24975a93910d661 --- /dev/null +++ b/data/data_loader.py @@ -0,0 +1,97 @@ +from io import BytesIO + +import numpy as np +import lmdb +from PIL import Image +from skimage import color +import torch +from torch.utils.data import Dataset +from data.tps_transformation import tps_transform + +def RGB2Lab(inputs): + return color.rgb2lab(inputs) + +def Normalize(inputs): + # output l [-50,50] ab[-128,128] + l = inputs[:, :, 0:1] + ab = inputs[:, :, 1:3] + l = l - 50 + # ab = ab + lab = np.concatenate((l, ab), 2) + + return lab.astype('float32') + +def selfnormalize(inputs): + d = torch.max(inputs) - torch.min(inputs) + out = (inputs) / d + return out + +def to_gray(inputs): + img_gray = np.clip((np.concatenate((inputs[:,:,:1], inputs[:,:,:1], inputs[:,:,:1]), 2)+50)/100*255, 0, 255).astype('uint8') + + return img_gray + +def numpy2tensor(inputs): + out = torch.from_numpy(inputs.transpose(2,0,1)) + return out + +class MultiResolutionDataset(Dataset): + def __init__(self, path, transform, resolution=256): + self.env = lmdb.open( + path, + max_readers=32, + readonly=True, + lock=False, + readahead=False, + meminit=False, + ) + + if not self.env: + raise IOError('Cannot open lmdb dataset', path) + + with self.env.begin(write=False) as txn: + self.length = int(txn.get('length'.encode('utf-8')).decode('utf-8')) + + self.resolution = resolution + self.transform = transform + + def __len__(self): + return self.length + + def __getitem__(self, index): + with self.env.begin(write=False) as txn: + key = f'{self.resolution}-{str(index).zfill(5)}'.encode('utf-8') + img_bytes = txn.get(key) + + buffer = BytesIO(img_bytes) + img = Image.open(buffer) + img_src = np.array(img) # [0,255] uint8 + + # ima_a = img_src + # ima_a = ima_a.astype('uint8') + # ima_a = Image.fromarray(ima_a) + # ima_a.show() + + ## add gaussian noise + noise = np.random.uniform(-5, 5, np.shape(img_src)) + img_ref = np.clip(np.array(img_src) + noise, 0, 255) + + + img_ref = tps_transform(img_ref) # [0,255] uint8 + img_ref = np.clip(img_ref, 0, 255) + img_ref = img_ref.astype('uint8') + img_ref = Image.fromarray(img_ref) + img_ref = np.array(self.transform(img_ref)) # [0,255] uint8 + + img_lab = Normalize(RGB2Lab(img_src)) # l [-50,50] ab [-128, 128] + + img = img_src.astype('float32') # [0,255] float32 RGB + img_ref = img_ref.astype('float32') # [0,255] float32 RGB + + img = numpy2tensor(img) + img_ref = numpy2tensor(img_ref) # [B, 3, 256, 256] + img_lab = numpy2tensor(img_lab) + + return img, img_ref, img_lab + + \ No newline at end of file diff --git a/data/data_loader_sketch.py b/data/data_loader_sketch.py new file mode 100644 index 0000000000000000000000000000000000000000..1b9c8bd1af41ddf14cc9b10f36dfd99c5ac49094 --- /dev/null +++ b/data/data_loader_sketch.py @@ -0,0 +1,120 @@ +from io import BytesIO + +import numpy as np +import lmdb +from PIL import Image +from skimage import color +import torch +from torch.utils.data import Dataset +from data.tps_transformation import tps_transform + +def RGB2Lab(inputs): + return color.rgb2lab(inputs) + +def Normalize(inputs): + # output l [-50,50] ab[-128,128] + l = inputs[:, :, 0:1] + ab = inputs[:, :, 1:3] + l = l - 50 + # ab = ab + lab = np.concatenate((l, ab), 2) + + return lab.astype('float32') + +def selfnormalize(inputs): + d = torch.max(inputs) - torch.min(inputs) + out = (inputs) / d + return out + +def to_gray(inputs): + img_gray = np.clip((np.concatenate((inputs[:,:,:1], inputs[:,:,:1], inputs[:,:,:1]), 2)+50)/100*255, 0, 255).astype('uint8') + + return img_gray + +def numpy2tensor(inputs): + out = torch.from_numpy(inputs.transpose(2,0,1)) + return out + +class MultiResolutionDataset(Dataset): + def __init__(self, path, transform, resolution=256): + self.env = lmdb.open( + path, + max_readers=32, + readonly=True, + lock=False, + readahead=False, + meminit=False, + ) + + if not self.env: + raise IOError('Cannot open lmdb dataset', path) + + with self.env.begin(write=False) as txn: + self.length = int(txn.get('length'.encode('utf-8')).decode('utf-8')) + + self.resolution = resolution + self.transform = transform + + def __len__(self): + return self.length + + def __getitem__(self, index): + with self.env.begin(write=False) as txn: + key = f'{self.resolution}-{str(index).zfill(5)}'.encode('utf-8') + img_bytes = txn.get(key) + + buffer = BytesIO(img_bytes) + img = Image.open(buffer) + img_src = np.array(img) # [0,255] uint8 + + # ima_a = img_src + # ima_a = ima_a.astype('uint8') + # ima_a = Image.fromarray(ima_a) + # ima_a.show() + + # get the left color image + img_ref = img_src[:, :256] + ## add gaussian noise + noise = np.random.uniform(-5, 5, np.shape(img_ref)) + img_ref = np.clip(np.array(img_ref) + noise, 0, 255) + + + img_ref = tps_transform(img_ref) # [0,255] uint8 + img_ref = np.clip(img_ref, 0, 255) + img_ref = img_ref.astype('uint8') + img_ref = Image.fromarray(img_ref) + img_ref = np.array(self.transform(img_ref)) # [0,255] uint8 + + img_lab = img_src[:, :256] + img_lab = Normalize(RGB2Lab(img_lab)) # l [-50,50] ab [-128, 128] + + img_lab_sketch = img_src[:, 256:] + img_lab_sketch = Normalize(RGB2Lab(img_lab_sketch)) # l [-50,50] ab [-128, 128] + + img = img_src[:, :256].astype('float32') # [0,255] float32 RGB + img_ref = img_ref.astype('float32') # [0,255] float32 RGB + + # ima_a = img + # ima_a = ima_a.astype('uint8') + # ima_a = Image.fromarray(ima_a) + # ima_a.show() + # + # ima_a = img_ref + # ima_a = ima_a.astype('uint8') + # ima_a = Image.fromarray(ima_a) + # ima_a.show() + # + # ima_a = img_lab + # ima_a = ima_a.astype('uint8') + # ima_a = Image.fromarray(ima_a) + # ima_a.show() + + + img = numpy2tensor(img) + img_ref = numpy2tensor(img_ref) # [B, 3, 256, 256] + img_lab = numpy2tensor(img_lab) + img_lab_sketch = numpy2tensor(img_lab_sketch) + + return img, img_ref, img_lab, img_lab_sketch + + \ No newline at end of file diff --git a/data/prepare_data.py b/data/prepare_data.py new file mode 100644 index 0000000000000000000000000000000000000000..042aff639b56af24178dbf73288aa75ac9027839 --- /dev/null +++ b/data/prepare_data.py @@ -0,0 +1,84 @@ +import argparse +from io import BytesIO +import multiprocessing +from functools import partial + +from PIL import Image +import lmdb +from tqdm import tqdm +from torchvision import datasets +from torchvision.transforms import functional as trans_fn + + +def resize_and_convert(img, size, resample, quality=100): + img = trans_fn.resize(img, size, resample) + img = trans_fn.center_crop(img, size) + buffer = BytesIO() + img.save(buffer, format='jpeg', quality=quality) + val = buffer.getvalue() + + return val + + +def resize_multiple(img, sizes=(128, 256, 512, 1024), resample=Image.LANCZOS, quality=100): + imgs = [] + + for size in sizes: + imgs.append(resize_and_convert(img, size, resample, quality)) + + return imgs + + +def resize_worker(img_file, sizes, resample): + i, file = img_file + img = Image.open(file) + img = img.convert('RGB') + out = resize_multiple(img, sizes=sizes, resample=resample) + + return i, out + + +def prepare(env, dataset, n_worker, sizes=(128, 256, 512, 1024), resample=Image.LANCZOS): + resize_fn = partial(resize_worker, sizes=sizes, resample=resample) + + files = sorted(dataset.imgs, key=lambda x: x[0]) + # print(files) + # eixt() + files = [(i, file) for i, (file, label) in enumerate(files)] + total = 0 + + with multiprocessing.Pool(n_worker) as pool: + for i, imgs in tqdm(pool.imap_unordered(resize_fn, files)): + for size, img in zip(sizes, imgs): + key = f'{size}-{str(i).zfill(5)}'.encode('utf-8') + + with env.begin(write=True) as txn: + txn.put(key, img) + + total += 1 + + with env.begin(write=True) as txn: + txn.put('length'.encode('utf-8'), str(total).encode('utf-8')) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--out', type=str) + parser.add_argument('--size', type=str, default='128,256,512,1024') + parser.add_argument('--n_worker', type=int, default=8) + parser.add_argument('--resample', type=str, default='lanczos') + parser.add_argument('path', type=str) + + args = parser.parse_args() + + resample_map = {'lanczos': Image.LANCZOS, 'bilinear': Image.BILINEAR} + resample = resample_map[args.resample] + + sizes = [int(s.strip()) for s in args.size.split(',')] + + print(f'Make dataset of image sizes:', ', '.join(str(s) for s in sizes)) + + imgset = datasets.ImageFolder(args.path) + + with lmdb.open(args.out, map_size=6 * 1024 * 1024 * 1024, readahead=False) as env: + prepare(env, imgset, args.n_worker, sizes=sizes, resample=resample) diff --git a/data/prepare_data_sketch.py b/data/prepare_data_sketch.py new file mode 100644 index 0000000000000000000000000000000000000000..108934ca7e40f64106f5124db4a0f5ccb7eb97c0 --- /dev/null +++ b/data/prepare_data_sketch.py @@ -0,0 +1,84 @@ +import argparse +from io import BytesIO +import multiprocessing +from functools import partial + +from PIL import Image +import lmdb +from tqdm import tqdm +from torchvision import datasets +from torchvision.transforms import functional as trans_fn + + +def resize_and_convert(img, size, resample, quality=100): + img = trans_fn.resize(img, size=[256, 512], interpolation=resample) + img = trans_fn.center_crop(img, output_size=[256, 512]) + buffer = BytesIO() + img.save(buffer, format='jpeg', quality=quality) + val = buffer.getvalue() + + return val + + +def resize_multiple(img, sizes=(128, 256, 512, 1024), resample=Image.LANCZOS, quality=100): + imgs = [] + + for size in sizes: + imgs.append(resize_and_convert(img, size, resample, quality)) + + return imgs + + +def resize_worker(img_file, sizes, resample): + i, file = img_file + img = Image.open(file) + img = img.convert('RGB') + out = resize_multiple(img, sizes=sizes, resample=resample) + + return i, out + + +def prepare(env, dataset, n_worker, sizes=(128, 256, 512, 1024), resample=Image.LANCZOS): + resize_fn = partial(resize_worker, sizes=sizes, resample=resample) + + files = sorted(dataset.imgs, key=lambda x: x[0]) + # print(files) + # eixt() + files = [(i, file) for i, (file, label) in enumerate(files)] + total = 0 + + with multiprocessing.Pool(n_worker) as pool: + for i, imgs in tqdm(pool.imap_unordered(resize_fn, files)): + for size, img in zip(sizes, imgs): + key = f'{size}-{str(i).zfill(5)}'.encode('utf-8') + + with env.begin(write=True) as txn: + txn.put(key, img) + + total += 1 + + with env.begin(write=True) as txn: + txn.put('length'.encode('utf-8'), str(total).encode('utf-8')) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--out', type=str) + parser.add_argument('--size', type=str, default='128,256,512,1024') + parser.add_argument('--n_worker', type=int, default=8) + parser.add_argument('--resample', type=str, default='lanczos') + parser.add_argument('path', type=str) + + args = parser.parse_args() + + resample_map = {'lanczos': Image.LANCZOS, 'bilinear': Image.BILINEAR} + resample = resample_map[args.resample] + + sizes = [int(s.strip()) for s in args.size.split(',')] + + print(f'Make dataset of image sizes:', ', '.join(str(s) for s in sizes)) + + imgset = datasets.ImageFolder(args.path) + + with lmdb.open(args.out, map_size=6 * 1024 * 1024 * 1024, readahead=False) as env: + prepare(env, imgset, args.n_worker, sizes=sizes, resample=resample) diff --git a/data/thinplate/__init__.py b/data/thinplate/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e83a4bc7929a0f2449f04e0c8d70e56fcd4be22e --- /dev/null +++ b/data/thinplate/__init__.py @@ -0,0 +1,9 @@ +from data.thinplate.numpy import * + +try: + import torch + import data.thinplate.pytorch as torch +except ImportError: + pass + +__version__ = '1.0.0' \ No newline at end of file diff --git a/data/thinplate/__pycache__/__init__.cpython-310.pyc b/data/thinplate/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0aa67a7038800d8bdf381c1bba7e2655853ad00b Binary files /dev/null and b/data/thinplate/__pycache__/__init__.cpython-310.pyc differ diff --git a/data/thinplate/__pycache__/numpy.cpython-310.pyc b/data/thinplate/__pycache__/numpy.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7bfbe21351f6980ac9199b7961c832dc2a0fe9d7 Binary files /dev/null and b/data/thinplate/__pycache__/numpy.cpython-310.pyc differ diff --git a/data/thinplate/__pycache__/pytorch.cpython-310.pyc b/data/thinplate/__pycache__/pytorch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16a43c85345220f4fcd992a02bdb27f8f8b90a92 Binary files /dev/null and b/data/thinplate/__pycache__/pytorch.cpython-310.pyc differ diff --git a/data/thinplate/numpy.py b/data/thinplate/numpy.py new file mode 100644 index 0000000000000000000000000000000000000000..9f103110778645b2eff29789beeb80b008452f06 --- /dev/null +++ b/data/thinplate/numpy.py @@ -0,0 +1,115 @@ +# Copyright 2018 Christoph Heindl. +# +# Licensed under MIT License +# ============================================================ + +import numpy as np + +class TPS: + @staticmethod + def fit(c, lambd=0., reduced=False): + n = c.shape[0] + + U = TPS.u(TPS.d(c, c)) + K = U + np.eye(n, dtype=np.float32)*lambd + + P = np.ones((n, 3), dtype=np.float32) + P[:, 1:] = c[:, :2] + + v = np.zeros(n+3, dtype=np.float32) + v[:n] = c[:, -1] + + A = np.zeros((n+3, n+3), dtype=np.float32) + A[:n, :n] = K + A[:n, -3:] = P + A[-3:, :n] = P.T + + theta = np.linalg.solve(A, v) # p has structure w,a + return theta[1:] if reduced else theta + + @staticmethod + def d(a, b): + return np.sqrt(np.square(a[:, None, :2] - b[None, :, :2]).sum(-1)) + + @staticmethod + def u(r): + return r**2 * np.log(r + 1e-6) + + @staticmethod + def z(x, c, theta): + x = np.atleast_2d(x) + U = TPS.u(TPS.d(x, c)) + w, a = theta[:-3], theta[-3:] + reduced = theta.shape[0] == c.shape[0] + 2 + if reduced: + w = np.concatenate((-np.sum(w, keepdims=True), w)) + b = np.dot(U, w) + return a[0] + a[1]*x[:, 0] + a[2]*x[:, 1] + b + +def uniform_grid(shape): + '''Uniform grid coordinates. + + Params + ------ + shape : tuple + HxW defining the number of height and width dimension of the grid + + Returns + ------- + points: HxWx2 tensor + Grid coordinates over [0,1] normalized image range. + ''' + + H,W = shape[:2] + c = np.empty((H, W, 2)) + c[..., 0] = np.linspace(0, 1, W, dtype=np.float32) + c[..., 1] = np.expand_dims(np.linspace(0, 1, H, dtype=np.float32), -1) + + return c + +def tps_theta_from_points(c_src, c_dst, reduced=False): + delta = c_src - c_dst + + cx = np.column_stack((c_dst, delta[:, 0])) + cy = np.column_stack((c_dst, delta[:, 1])) + + theta_dx = TPS.fit(cx, reduced=reduced) + theta_dy = TPS.fit(cy, reduced=reduced) + + return np.stack((theta_dx, theta_dy), -1) + + +def tps_grid(theta, c_dst, dshape): + ugrid = uniform_grid(dshape) + + reduced = c_dst.shape[0] + 2 == theta.shape[0] + + dx = TPS.z(ugrid.reshape((-1, 2)), c_dst, theta[:, 0]).reshape(dshape[:2]) + dy = TPS.z(ugrid.reshape((-1, 2)), c_dst, theta[:, 1]).reshape(dshape[:2]) + dgrid = np.stack((dx, dy), -1) + + grid = dgrid + ugrid + + return grid # H'xW'x2 grid[i,j] in range [0..1] + +def tps_grid_to_remap(grid, sshape): + '''Convert a dense grid to OpenCV's remap compatible maps. + + Params + ------ + grid : HxWx2 array + Normalized flow field coordinates as computed by compute_densegrid. + sshape : tuple + Height and width of source image in pixels. + + + Returns + ------- + mapx : HxW array + mapy : HxW array + ''' + + mx = (grid[:, :, 0] * sshape[1]).astype(np.float32) + my = (grid[:, :, 1] * sshape[0]).astype(np.float32) + + return mx, my \ No newline at end of file diff --git a/data/thinplate/pytorch.py b/data/thinplate/pytorch.py new file mode 100644 index 0000000000000000000000000000000000000000..d0538daa74872778f5ce5e476544868e99d965e6 --- /dev/null +++ b/data/thinplate/pytorch.py @@ -0,0 +1,126 @@ +# Copyright 2018 Christoph Heindl. +# +# Licensed under MIT License +# ============================================================ + +import torch + +def tps(theta, ctrl, grid): + '''Evaluate the thin-plate-spline (TPS) surface at xy locations arranged in a grid. + The TPS surface is a minimum bend interpolation surface defined by a set of control points. + The function value for a x,y location is given by + + TPS(x,y) := theta[-3] + theta[-2]*x + theta[-1]*y + \sum_t=0,T theta[t] U(x,y,ctrl[t]) + + This method computes the TPS value for multiple batches over multiple grid locations for 2 + surfaces in one go. + + Params + ------ + theta: Nx(T+3)x2 tensor, or Nx(T+2)x2 tensor + Batch size N, T+3 or T+2 (reduced form) model parameters for T control points in dx and dy. + ctrl: NxTx2 tensor or Tx2 tensor + T control points in normalized image coordinates [0..1] + grid: NxHxWx3 tensor + Grid locations to evaluate with homogeneous 1 in first coordinate. + + Returns + ------- + z: NxHxWx2 tensor + Function values at each grid location in dx and dy. + ''' + + N, H, W, _ = grid.size() + + if ctrl.dim() == 2: + ctrl = ctrl.expand(N, *ctrl.size()) + + T = ctrl.shape[1] + + diff = grid[...,1:].unsqueeze(-2) - ctrl.unsqueeze(1).unsqueeze(1) + D = torch.sqrt((diff**2).sum(-1)) + U = (D**2) * torch.log(D + 1e-6) + + w, a = theta[:, :-3, :], theta[:, -3:, :] + + reduced = T + 2 == theta.shape[1] + if reduced: + w = torch.cat((-w.sum(dim=1, keepdim=True), w), dim=1) + + # U is NxHxWxT + b = torch.bmm(U.view(N, -1, T), w).view(N,H,W,2) + # b is NxHxWx2 + z = torch.bmm(grid.view(N,-1,3), a).view(N,H,W,2) + b + + return z + +def tps_grid(theta, ctrl, size): + '''Compute a thin-plate-spline grid from parameters for sampling. + + Params + ------ + theta: Nx(T+3)x2 tensor + Batch size N, T+3 model parameters for T control points in dx and dy. + ctrl: NxTx2 tensor, or Tx2 tensor + T control points in normalized image coordinates [0..1] + size: tuple + Output grid size as NxCxHxW. C unused. This defines the output image + size when sampling. + + Returns + ------- + grid : NxHxWx2 tensor + Grid suitable for sampling in pytorch containing source image + locations for each output pixel. + ''' + N, _, H, W = size + + grid = theta.new(N, H, W, 3) + grid[:, :, :, 0] = 1. + grid[:, :, :, 1] = torch.linspace(0, 1, W) + grid[:, :, :, 2] = torch.linspace(0, 1, H).unsqueeze(-1) + + z = tps(theta, ctrl, grid) + return (grid[...,1:] + z)*2-1 # [-1,1] range required by F.sample_grid + +def tps_sparse(theta, ctrl, xy): + if xy.dim() == 2: + xy = xy.expand(theta.shape[0], *xy.size()) + + N, M = xy.shape[:2] + grid = xy.new(N, M, 3) + grid[..., 0] = 1. + grid[..., 1:] = xy + + z = tps(theta, ctrl, grid.view(N,M,1,3)) + return xy + z.view(N, M, 2) + +def uniform_grid(shape): + '''Uniformly places control points aranged in grid accross normalized image coordinates. + + Params + ------ + shape : tuple + HxW defining the number of control points in height and width dimension + + Returns + ------- + points: HxWx2 tensor + Control points over [0,1] normalized image range. + ''' + H,W = shape[:2] + c = torch.zeros(H, W, 2) + c[..., 0] = torch.linspace(0, 1, W) + c[..., 1] = torch.linspace(0, 1, H).unsqueeze(-1) + return c + +if __name__ == '__main__': + c = torch.tensor([ + [0., 0], + [1., 0], + [1., 1], + [0, 1], + ]).unsqueeze(0) + theta = torch.zeros(1, 4+3, 2) + size= (1,1,6,3) + print(tps_grid(theta, c, size).shape) \ No newline at end of file diff --git a/data/thinplate/tests/__init__.py b/data/thinplate/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/thinplate/tests/test_tps_numpy.py b/data/thinplate/tests/test_tps_numpy.py new file mode 100644 index 0000000000000000000000000000000000000000..b07cffe3ff81bed7308c7963c1e01a38ac8b6265 --- /dev/null +++ b/data/thinplate/tests/test_tps_numpy.py @@ -0,0 +1,85 @@ + +import numpy as np +from numpy.testing import assert_allclose +import thinplate as tps + +def test_numpy_fit(): + c = np.array([ + [0., 0, 0.0], + [1., 0, 0.0], + [1., 1, 0.0], + [0, 1, 0.0], + ]) + + theta = tps.TPS.fit(c) + assert_allclose(theta, 0) + assert_allclose(tps.TPS.z(c, c, theta), c[:, 2]) + + c = np.array([ + [0., 0, 1.0], + [1., 0, 1.0], + [1., 1, 1.0], + [0, 1, 1.0], + ]) + + theta = tps.TPS.fit(c) + assert_allclose(theta[:-3], 0) + assert_allclose(theta[-3:], [1, 0, 0]) + assert_allclose(tps.TPS.z(c, c, theta), c[:, 2], atol=1e-3) + + # reduced form + theta = tps.TPS.fit(c, reduced=True) + assert len(theta) == c.shape[0] + 2 + assert_allclose(theta[:-3], 0) + assert_allclose(theta[-3:], [1, 0, 0]) + assert_allclose(tps.TPS.z(c, c, theta), c[:, 2], atol=1e-3) + + c = np.array([ + [0., 0, -.5], + [1., 0, 0.5], + [1., 1, 0.2], + [0, 1, 0.8], + ]) + + theta = tps.TPS.fit(c) + assert_allclose(tps.TPS.z(c, c, theta), c[:, 2], atol=1e-3) + +def test_numpy_densegrid(): + + # enlarges a small rectangle to full view + + import cv2 + + img = np.zeros((40, 40), dtype=np.uint8) + img[10:21, 10:21] = 255 + + c_dst = np.array([ + [0., 0], + [1., 0], + [1, 1], + [0, 1], + ]) + + + c_src = np.array([ + [10., 10], + [20., 10], + [20, 20], + [10, 20], + ]) / 40. + + theta = tps.tps_theta_from_points(c_src, c_dst) + theta_r = tps.tps_theta_from_points(c_src, c_dst, reduced=True) + + grid = tps.tps_grid(theta, c_dst, (20,20)) + grid_r = tps.tps_grid(theta_r, c_dst, (20,20)) + + mapx, mapy = tps.tps_grid_to_remap(grid, img.shape) + warped = cv2.remap(img, mapx, mapy, cv2.INTER_CUBIC) + + assert img.min() == 0. + assert img.max() == 255. + assert warped.shape == (20,20) + assert warped.min() == 255. + assert warped.max() == 255. + assert np.linalg.norm(grid.reshape(-1,2) - grid_r.reshape(-1,2)) < 1e-3 diff --git a/data/thinplate/tests/test_tps_pytorch.py b/data/thinplate/tests/test_tps_pytorch.py new file mode 100644 index 0000000000000000000000000000000000000000..691836385c67aa077241a690480a7eb94f21055f --- /dev/null +++ b/data/thinplate/tests/test_tps_pytorch.py @@ -0,0 +1,43 @@ +import torch +import torch.optim as optim +import torch.nn.functional as F + +import numpy as np +import thinplate as tps + +from numpy.testing import assert_allclose + +def test_pytorch_grid(): + + c_dst = np.array([ + [0., 0], + [1., 0], + [1, 1], + [0, 1], + ], dtype=np.float32) + + + c_src = np.array([ + [10., 10], + [20., 10], + [20, 20], + [10, 20], + ], dtype=np.float32) / 40. + + theta = tps.tps_theta_from_points(c_src, c_dst) + theta_r = tps.tps_theta_from_points(c_src, c_dst, reduced=True) + + np_grid = tps.tps_grid(theta, c_dst, (20,20)) + np_grid_r = tps.tps_grid(theta_r, c_dst, (20,20)) + + pth_theta = torch.tensor(theta).unsqueeze(0) + pth_grid = tps.torch.tps_grid(pth_theta, torch.tensor(c_dst), (1, 1, 20, 20)).squeeze().numpy() + pth_grid = (pth_grid + 1) / 2 # convert [-1,1] range to [0,1] + + pth_theta_r = torch.tensor(theta_r).unsqueeze(0) + pth_grid_r = tps.torch.tps_grid(pth_theta_r, torch.tensor(c_dst), (1, 1, 20, 20)).squeeze().numpy() + pth_grid_r = (pth_grid_r + 1) / 2 # convert [-1,1] range to [0,1] + + assert_allclose(np_grid, pth_grid) + assert_allclose(np_grid_r, pth_grid_r) + assert_allclose(np_grid_r, np_grid) \ No newline at end of file diff --git a/data/tps_transformation.py b/data/tps_transformation.py new file mode 100644 index 0000000000000000000000000000000000000000..edc189433d744da14c36afac5a723ef7b5459fe5 --- /dev/null +++ b/data/tps_transformation.py @@ -0,0 +1,44 @@ +import numpy as np +import data.thinplate as tps +import cv2 +import random +import math + +# Reference : https://github.com/cheind/py-thin-plate-spline + +def tps_transform(img, dshape=None): + + while True: + point1 = round(random.uniform(0.3, 0.7), 2) + point2 = round(random.uniform(0.3, 0.7), 2) + range_1 = round(random.uniform(-0.25, 0.25), 2) + range_2 = round(random.uniform(-0.25, 0.25), 2) + if math.isclose(point1 + range_1, point2 + range_2): + continue + else: + break + + c_src = np.array([ + [0.0, 0.0], + [1., 0], + [1, 1], + [0, 1], + [point1, point1], + [point2, point2], + ]) + + c_dst = np.array([ + [0., 0], + [1., 0], + [1, 1], + [0, 1], + [point1 + range_1, point1 + range_1], + [point2 + range_2, point2 + range_2], + ]) + + dshape = dshape or img.shape + theta = tps.tps_theta_from_points(c_src, c_dst, reduced=True) + grid = tps.tps_grid(theta, c_dst, dshape) + mapx, mapy = tps.tps_grid_to_remap(grid, img.shape) + return cv2.remap(img, mapx, mapy, cv2.INTER_CUBIC) + diff --git a/discriminator.py b/discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..86445dfe8fdf207a352b5046862f6364e8f627a2 --- /dev/null +++ b/discriminator.py @@ -0,0 +1,31 @@ +import torch.nn as nn +import torch.nn.functional as F +import torch + + +class Discriminator(nn.Module): + def __init__(self, in_channels=3): + super(Discriminator, self).__init__() + + def discriminator_block(in_filters, out_filters, normalization=True): + """Returns downsampling layers of each discriminator block""" + layers = [nn.Conv2d(in_filters, out_filters, 4, stride=2, padding=1)] + if normalization: + layers.append(nn.InstanceNorm2d(out_filters)) + layers.append(nn.LeakyReLU(0.2, inplace=True)) + return layers + + self.model = nn.Sequential( + *discriminator_block(in_channels * 3, 64, normalization=False), + *discriminator_block(64, 128), + *discriminator_block(128, 256), + *discriminator_block(256, 512), + nn.ZeroPad2d((1, 0, 1, 0)), + nn.Conv2d(512, 1, 4, padding=1, bias=False) + ) + + def forward(self, img_out, img_l, img_ref ): + # Concatenate image and condition image by channels to produce input + img_input = torch.cat((img_out, img_l, img_ref), 1) + return self.model(img_input) + diff --git a/distributed.py b/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..51fa243257ef302e2015d5ff36ac531b86a9a0ce --- /dev/null +++ b/distributed.py @@ -0,0 +1,126 @@ +import math +import pickle + +import torch +from torch import distributed as dist +from torch.utils.data.sampler import Sampler + + +def get_rank(): + if not dist.is_available(): + return 0 + + if not dist.is_initialized(): + return 0 + + return dist.get_rank() + + +def synchronize(): + if not dist.is_available(): + return + + if not dist.is_initialized(): + return + + world_size = dist.get_world_size() + + if world_size == 1: + return + + dist.barrier() + + +def get_world_size(): + if not dist.is_available(): + return 1 + + if not dist.is_initialized(): + return 1 + + return dist.get_world_size() + + +def reduce_sum(tensor): + if not dist.is_available(): + return tensor + + if not dist.is_initialized(): + return tensor + + tensor = tensor.clone() + dist.all_reduce(tensor, op=dist.ReduceOp.SUM) + + return tensor + + +def gather_grad(params): + world_size = get_world_size() + + if world_size == 1: + return + + for param in params: + if param.grad is not None: + dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM) + param.grad.data.div_(world_size) + + +def all_gather(data): + world_size = get_world_size() + + if world_size == 1: + return [data] + + buffer = pickle.dumps(data) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to('cuda') + + local_size = torch.IntTensor([tensor.numel()]).to('cuda') + size_list = [torch.IntTensor([0]).to('cuda') for _ in range(world_size)] + dist.all_gather(size_list, local_size) + size_list = [int(size.item()) for size in size_list] + max_size = max(size_list) + + tensor_list = [] + for _ in size_list: + tensor_list.append(torch.ByteTensor(size=(max_size,)).to('cuda')) + + if local_size != max_size: + padding = torch.ByteTensor(size=(max_size - local_size,)).to('cuda') + tensor = torch.cat((tensor, padding), 0) + + dist.all_gather(tensor_list, tensor) + + data_list = [] + + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def reduce_loss_dict(loss_dict): + world_size = get_world_size() + + if world_size < 2: + return loss_dict + + with torch.no_grad(): + keys = [] + losses = [] + + for k in sorted(loss_dict.keys()): + keys.append(k) + losses.append(loss_dict[k]) + + losses = torch.stack(losses, 0) + dist.reduce(losses, dst=0) + + if dist.get_rank() == 0: + losses /= world_size + + reduced_losses = {k: v for k, v in zip(keys, losses)} + + return reduced_losses diff --git a/experiments/Color2Manga_gray/074000_gray.pt b/experiments/Color2Manga_gray/074000_gray.pt new file mode 100644 index 0000000000000000000000000000000000000000..4335c388889d54aa045795b6cdbe5a082eb9e0cf --- /dev/null +++ b/experiments/Color2Manga_gray/074000_gray.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f4785d00d4463ecb5f02d79f79f9da747a57179b5b016408e65da0e4f62572 +size 1091510163 diff --git a/experiments/Color2Manga_sketch/116000_sketch.pt b/experiments/Color2Manga_sketch/116000_sketch.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b96fce65631be8de9e676fcd3c7bdd3b0e4b67e --- /dev/null +++ b/experiments/Color2Manga_sketch/116000_sketch.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52505452ec908ffd1ae4499a205a55263a8cd7d7bdf4623b59edccf8e8636d33 +size 1091510163 diff --git a/experiments/Discriminator/074000_d.pt b/experiments/Discriminator/074000_d.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc86e1e38488a3face4590275fdd33ef29d66b79 --- /dev/null +++ b/experiments/Discriminator/074000_d.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e622a55cb8bae33377e85963eaa496d7e9bd9e1f4449b853d41235729cc7d40f +size 33261919 diff --git a/experiments/Discriminator/116000_d.pt b/experiments/Discriminator/116000_d.pt new file mode 100644 index 0000000000000000000000000000000000000000..efe1622e011cb3e5f15089f5ff5e0b590d600505 --- /dev/null +++ b/experiments/Discriminator/116000_d.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:169ae73ef7c788ec3921c918d0a9ebdecc4115492b177dfd98660b7816d6ce5a +size 33261983 diff --git a/experiments/VGG19/vgg19-dcbb9e9d.pth b/experiments/VGG19/vgg19-dcbb9e9d.pth new file mode 100644 index 0000000000000000000000000000000000000000..25ba4fbc5cd85f42da7dfd91c22193c91776512f --- /dev/null +++ b/experiments/VGG19/vgg19-dcbb9e9d.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcbb9e9dad569fff7a846263a77324fc34978fea2bfb039c012d710e1776ae44 +size 574673361 diff --git a/extractor/Open-Sans-Bold.ttf b/extractor/Open-Sans-Bold.ttf new file mode 100644 index 0000000000000000000000000000000000000000..fd79d43bea0293ac1b20e8aca1142627983d2c07 Binary files /dev/null and b/extractor/Open-Sans-Bold.ttf differ diff --git a/extractor/__pycache__/manga_panel_extractor.cpython-310.pyc b/extractor/__pycache__/manga_panel_extractor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4be457b21195296fb3fa3abe18d233f0263c295a Binary files /dev/null and b/extractor/__pycache__/manga_panel_extractor.cpython-310.pyc differ diff --git a/extractor/__pycache__/manga_panel_extractor.cpython-38.pyc b/extractor/__pycache__/manga_panel_extractor.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9f7a1e6ee9fb8dbc1026961ac4d921918dc9ad1 Binary files /dev/null and b/extractor/__pycache__/manga_panel_extractor.cpython-38.pyc differ diff --git a/extractor/manga_panel_extractor.py b/extractor/manga_panel_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..a32438e969e9ecf27a9246455c1cccb7be677d61 --- /dev/null +++ b/extractor/manga_panel_extractor.py @@ -0,0 +1,174 @@ +# stdlib +import argparse +from argparse import RawTextHelpFormatter +import os +from os.path import splitext, basename, exists, join +from os import makedirs +# 3p +from tqdm import tqdm +import numpy as np +from skimage import measure +from PIL import Image +from PIL import ImageFont +from PIL import ImageDraw +import cv2 +# project +from utils import get_files, load_image +from skimage import io + + +class PanelExtractor: + def __init__(self, min_pct_panel=2, max_pct_panel=90, paper_th=0.35): + assert min_pct_panel < max_pct_panel, "Minimum percentage must be smaller than maximum percentage" + self.min_panel = min_pct_panel / 100 + self.max_panel = max_pct_panel / 100 + self.paper_th = paper_th + + def _generate_panel_blocks(self, img): + img = img if len(img.shape) == 2 else img[:, :, 0] + blur = cv2.GaussianBlur(img, (5, 5), 0) + thresh = cv2.threshold(blur, 230, 255, cv2.THRESH_BINARY)[1] + cv2.rectangle(thresh, (0, 0), tuple(img.shape[::-1]), (0, 0, 0), 10) + num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, 4, cv2.CV_32S) + ind = np.argsort(stats[:, 4], )[::-1][1] + panel_block_mask = ((labels == ind) * 255).astype("uint8") + # Image.fromarray(panel_block_mask).show() + return panel_block_mask + + def generate_panels(self, img): + block_mask = self._generate_panel_blocks(img) + cv2.rectangle(block_mask, (0, 0), tuple(block_mask.shape[::-1]), (255, 255, 255), 10) + # Image.fromarray(block_mask).show() + + # detect contours + contours, hierarchy = cv2.findContours(block_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + panels = [] + masks = [] + panel_masks = [] + # print(len(contours)) + + for i in range(len(contours)): + area = cv2.contourArea(contours[i]) + img_area = img.shape[0] * img.shape[1] + + # if the contour is very small or very big, it's likely wrongly detected + if area < (self.min_panel * img_area) or area > (self.max_panel * img_area): + continue + + x, y, w, h = cv2.boundingRect(contours[i]) + masks.append(cv2.boundingRect(contours[i])) + # create panel mask + panel_mask = np.ones_like(block_mask, "int32") + cv2.fillPoly(panel_mask, [contours[i].astype("int32")], color=(0, 0, 0)) + # Image.fromarray(panel_mask).show() + panel_mask = panel_mask[y:y + h, x:x + w].copy() + # Image.fromarray(panel_mask).show() + + # apply panel mask + panel = img[y:y + h, x:x + w].copy() + # Image.fromarray(panel).show() + panel[panel_mask == 1] = 255 + # Image.fromarray(panel).show() + + panels.append(panel) + panel_masks.append(panel_mask) + + return panels, masks, panel_masks + + def extract(self, folder): + print("Loading images ... ", end="") + # image_list, _, _ = get_files(folder) + image_list = [] + image_list.append(folder) + imgs = [load_image(x) for x in image_list] + print("Done!") + + folder = os.path.dirname(folder) + # create panels dir + if not exists(join(folder, "panels")): + makedirs(join(folder, "panels")) + folder = join(folder, "panels") + + # remove images with paper texture, not well segmented + paperless_imgs = [] + for img in tqdm(imgs, desc="Removing images with paper texture"): + hist, bins = np.histogram(img.copy().ravel(), 256, [0, 256]) + if np.sum(hist[50:200]) / np.sum(hist) < self.paper_th: + paperless_imgs.append(img) + + if not paperless_imgs: + return imgs, [], [] + for i, img in tqdm(enumerate(paperless_imgs), desc="extracting panels"): + panels, masks, panel_masks = self.generate_panels(img) + name, ext = splitext(basename(image_list[i])) + for j, panel in enumerate(panels): + cv2.imwrite(join(folder, f'{name}_{j}.{ext}'), panel) + + # show the order of colorized panels + img = Image.fromarray(img) + draw = ImageDraw.Draw(img) + font = ImageFont.truetype('extractor/Open-Sans-Bold.ttf', 160) + + def flatten(l): + for el in l: + if isinstance(el, list): + yield from flatten(el) + else: + yield el + + for i, bbox in enumerate(flatten(masks), start=1): + w, h = draw.textsize(str(i), font=font) + y = (bbox[1] + bbox[3] / 2 - h / 2) + x = (bbox[0] + bbox[2] / 2 - w / 2) + draw.text((x, y), str(i), (255, 215, 0), font=font) + img.show() + return panels, masks, panel_masks + + def concatPanels(self, img_file, fake_imgs, masks, panel_masks): + img = io.imread(img_file) + # out_imgs.append(f"D:\MyProject\Python\DL_learning\Manga-Panel-Extractor-master\out\in0_ref0.png") + # out_imgs.append(f"D:\MyProject\Python\DL_learning\Manga-Panel-Extractor-master\out\in1_ref1.png") + # out_imgs.append(f"D:\MyProject\Python\DL_learning\Manga-Panel-Extractor-master\out\in2_ref2.png") + for i in range(len(fake_imgs)): + x, y, w, h = masks[i] + # fake_img = io.imread(fake_imgs[i]) + # fake_img = np.array(fake_img) + fake_img = fake_imgs[i] + panel_mask = panel_masks[i] + img[y:y + h, x:x + w][panel_mask == 0] = fake_img[panel_mask == 0] + # Image.fromarray(img).show() + out_folder = os.path.dirname(img_file) + out_name = os.path.basename(img_file) + out_name = os.path.splitext(out_name)[0] + out_img_path = os.path.join(out_folder,'color',f'{out_name}_color.png') + + # show image + Image.fromarray(img).show() + # save image + folder_path = os.path.join(out_folder, 'color') + if not os.path.exists(folder_path): + os.mkdir(folder_path) + io.imsave(out_img_path, img) + + +def main(args): + panel_extractor = PanelExtractor(min_pct_panel=args.min_panel, max_pct_panel=args.max_panel) + panels, masks, panel_masks = panel_extractor.extract(args.folder) + panel_extractor.concatPanels(args.folder, [], masks, panel_masks) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Implementation of a Manga Panel Extractor and dialogue bubble text eraser.", + formatter_class=RawTextHelpFormatter + ) + parser.add_argument("-minp", "--min_panel", type=int, choices=range(1, 99), default=5, metavar="[1-99]", + help="Percentage of minimum panel area in relation to total page area.") + parser.add_argument("-maxp", "--max_panel", type=int, choices=range(1, 99), default=90, metavar="[1-99]", + help="Percentage of minimum panel area in relation to total page area.") + parser.add_argument("-f", '--folder', default='./images/002.png', type=str, + help="""folder path to input manga pages. +Panels will be saved to a directory named `panels` in this folder.""") + + args = parser.parse_args() + main(args) diff --git a/inference.py b/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..adefe0ed5a6c6bd3f8d069413eda28bef17b8c6e --- /dev/null +++ b/inference.py @@ -0,0 +1,229 @@ +import os +import numpy as np +from skimage import color, io + +import torch +import torch.nn.functional as F + +from PIL import Image +from models import ColorEncoder, ColorUNet +from extractor.manga_panel_extractor import PanelExtractor +import argparse + +os.environ["CUDA_VISIBLE_DEVICES"] = '0' + +def mkdirs(path): + if not os.path.exists(path): + os.makedirs(path) + +def Lab2RGB_out(img_lab): + img_lab = img_lab.detach().cpu() + img_l = img_lab[:,:1,:,:] + img_ab = img_lab[:,1:,:,:] + # print(torch.max(img_l), torch.min(img_l)) + # print(torch.max(img_ab), torch.min(img_ab)) + img_l = img_l + 50 + pred_lab = torch.cat((img_l, img_ab), 1)[0,...].numpy() + # grid_lab = utils.make_grid(pred_lab, nrow=1).numpy().astype("float64") + # print(grid_lab.shape) + out = (np.clip(color.lab2rgb(pred_lab.transpose(1, 2, 0)), 0, 1)* 255).astype("uint8") + return out + +def RGB2Lab(inputs): + return color.rgb2lab(inputs) + +def Normalize(inputs): + l = inputs[:, :, 0:1] + ab = inputs[:, :, 1:3] + l = l - 50 + lab = np.concatenate((l, ab), 2) + + return lab.astype('float32') + +def numpy2tensor(inputs): + out = torch.from_numpy(inputs.transpose(2,0,1)) + return out + +def tensor2numpy(inputs): + out = inputs[0,...].detach().cpu().numpy().transpose(1,2,0) + return out + +def preprocessing(inputs): + # input: rgb, [0, 255], uint8 + img_lab = Normalize(RGB2Lab(inputs)) + img = np.array(inputs, 'float32') # [0, 255] + img = numpy2tensor(img) + img_lab = numpy2tensor(img_lab) + return img.unsqueeze(0), img_lab.unsqueeze(0) + +if __name__ == "__main__": + device = "cuda" + + # model_name = 'Color2Manga_sketch' + ckpt_path = 'experiments/Color2Manga_gray/074000_gray.pt' + test_dir_path = 'test_datasets/gray_test' + no_extractor = False + # imgs_num = len(os.listdir(test_dir_path)) // 2 + imgsize = 256 + + parser = argparse.ArgumentParser() + + parser.add_argument("--path", type=str, default=None, help="path of input image") + parser.add_argument("--size", type=int, default=None) + parser.add_argument("--ckpt", type=str, default=None, help="path of model weight") + parser.add_argument("-ne", "--no_extractor", action='store_true', + help="Do not segment the manga panels.") + + args = parser.parse_args() + + if args.path: + ckpt_path = args.path + if args.size: + imgsize = args.size + if args.ckpt: + test_dir_path = args.ckpt + if args.no_extractor: + no_extractor = args.no_extractor + + + ckpt = torch.load(ckpt_path, map_location=lambda storage, loc: storage) + + colorEncoder = ColorEncoder().to(device) + colorEncoder.load_state_dict(ckpt["colorEncoder"]) + colorEncoder.eval() + + colorUNet = ColorUNet().to(device) + colorUNet.load_state_dict(ckpt["colorUNet"]) + colorUNet.eval() + + imgs = [] + imgs_lab = [] + + # for i in range(imgs_num): + # idx = i + # print('Image', idx, 'Input Image', 'in%d.JPEG'%idx, 'Ref Image', 'ref%d.JPEG'%idx) + + while 1: + print(f'make sure both manga image and reference images are under this path{test_dir_path}') + img_path = input("please input the name of image needed to be colorized(with file extension): ") + img_path = os.path.join(test_dir_path, img_path) + img_name = os.path.basename(img_path) + img_name = os.path.splitext(img_name)[0] + + if no_extractor: + ref_img_path = os.path.join(test_dir_path, input(f"{1}/{1} reference image:")) + + img1 = Image.open(img_path).convert("RGB") + width, height = img1.size + img2 = Image.open(ref_img_path).convert("RGB") + + img1, img1_lab = preprocessing(img1) + img2, img2_lab = preprocessing(img2) + + img1 = img1.to(device) + img1_lab = img1_lab.to(device) + img2 = img2.to(device) + img2_lab = img2_lab.to(device) + + # print('-------',torch.max(img1_lab[:,:1,:,:]), torch.min(img1_lab[:,1:,:,:])) + + with torch.no_grad(): + img2_resize = F.interpolate(img2 / 255., size=(imgsize, imgsize), mode='bilinear', + recompute_scale_factor=False, align_corners=False) + img1_L_resize = F.interpolate(img1_lab[:, :1, :, :] / 50., size=(imgsize, imgsize), mode='bilinear', + recompute_scale_factor=False, align_corners=False) + + color_vector = colorEncoder(img2_resize) + + fake_ab = colorUNet((img1_L_resize, color_vector)) + fake_ab = F.interpolate(fake_ab * 110, size=(height, width), mode='bilinear', + recompute_scale_factor=False, align_corners=False) + + fake_img = torch.cat((img1_lab[:, :1, :, :], fake_ab), 1) + fake_img = Lab2RGB_out(fake_img) + # io.imsave(out_img_path, fake_img) + + out_folder = os.path.dirname(img_path) + out_name = os.path.basename(img_path) + out_name = os.path.splitext(out_name)[0] + out_img_path = os.path.join(out_folder, 'color', f'{out_name}_color.png') + + # show image + Image.fromarray(fake_img).show() + # save image + folder_path = os.path.join(out_folder, 'color') + if not os.path.exists(folder_path): + os.mkdir(folder_path) + io.imsave(out_img_path, fake_img) + + continue + + + + # extract panels from manga + panel_extractor = PanelExtractor(min_pct_panel=5, max_pct_panel=90) + panels, masks, panel_masks = panel_extractor.extract(img_path) + panel_num = len(panels) + + ref_img_paths = [] + # ref_img_path = os.path.join(test_dir_path, '%03d_ref.png' % idx) + print("Please enter the name of the reference image in order according to the number prompts on the picture") + for i in range(panel_num): + ref_img_path = os.path.join(test_dir_path, input(f"{i+1}/{panel_num} reference image:")) + ref_img_paths.append(ref_img_path) + + + + + fake_imgs = [] + for i in range(panel_num): + img1 = Image.fromarray(panels[i]).convert("RGB") + width, height = img1.size + img2 = Image.open(ref_img_paths[i]).convert("RGB") + + # img1 = Image.open(img_path).convert("RGB") + # width, height = img1.size + # img2 = Image.open(ref_img_path).convert("RGB") + + img1, img1_lab = preprocessing(img1) + img2, img2_lab = preprocessing(img2) + + img1 = img1.to(device) + img1_lab = img1_lab.to(device) + img2 = img2.to(device) + img2_lab = img2_lab.to(device) + + # print('-------',torch.max(img1_lab[:,:1,:,:]), torch.min(img1_lab[:,1:,:,:])) + + with torch.no_grad(): + img2_resize = F.interpolate(img2 / 255., size=(imgsize, imgsize), mode='bilinear', recompute_scale_factor=False, align_corners=False) + img1_L_resize = F.interpolate(img1_lab[:,:1,:,:] / 50., size=(imgsize, imgsize), mode='bilinear', recompute_scale_factor=False, align_corners=False) + + color_vector = colorEncoder(img2_resize) + + fake_ab = colorUNet((img1_L_resize, color_vector)) + fake_ab = F.interpolate(fake_ab*110, size=(height, width), mode='bilinear', recompute_scale_factor=False, align_corners=False) + + fake_img = torch.cat((img1_lab[:,:1,:,:], fake_ab), 1) + fake_img = Lab2RGB_out(fake_img) + # io.imsave(f'test_datasets/gray_test/panels/{i}.png', fake_img) + fake_imgs.append(fake_img) + + if panel_num == 1: + out_folder = os.path.dirname(img_path) + out_name = os.path.basename(img_path) + out_name = os.path.splitext(out_name)[0] + out_img_path = os.path.join(out_folder,'color',f'{out_name}_color.png') + + # show image + Image.fromarray(fake_imgs[0]).show() + # save image + folder_path = os.path.join(out_folder, 'color') + if not os.path.exists(folder_path): + os.mkdir(folder_path) + io.imsave(out_img_path, fake_imgs[0]) + else: + panel_extractor.concatPanels(img_path, fake_imgs, masks, panel_masks) + + print(f'colored image has been put to: {test_dir_path}color') + diff --git a/models.py b/models.py new file mode 100644 index 0000000000000000000000000000000000000000..6d4deaee913883b522cd6d8d8ff47356f7d5118c --- /dev/null +++ b/models.py @@ -0,0 +1,223 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from vgg_model import vgg19 + +class DoubleConv(nn.Module): + """(convolution => [BN] => ReLU) * 2""" + + def __init__(self, in_channels, out_channels, mid_channels=None): + super().__init__() + if not mid_channels: + mid_channels = out_channels + self.double_conv = nn.Sequential( + nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1), + nn.BatchNorm2d(mid_channels), + nn.LeakyReLU(0.1, True), + nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1), + nn.BatchNorm2d(out_channels), + nn.LeakyReLU(0.1, True) + ) + + def forward(self, x): + x = self.double_conv(x) + return x + +class ResBlock(nn.Module): + """(convolution => [BN] => ReLU) * 2""" + + def __init__(self, in_channels, out_channels): + super().__init__() + self.bottle_conv = nn.Conv2d(in_channels, out_channels, 1, 1, 0) + self.double_conv = nn.Sequential( + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), + nn.BatchNorm2d(out_channels), + nn.LeakyReLU(0.2, True), + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1) + ) + + def forward(self, x): + x = self.bottle_conv(x) + x = self.double_conv(x) + x + return x / math.sqrt(2) + + +class Down(nn.Module): + """Downscaling with stride conv then double conv""" + + def __init__(self, in_channels, out_channels): + super().__init__() + self.main = nn.Sequential( + nn.Conv2d(in_channels, in_channels, 4, 2, 1), + nn.LeakyReLU(0.1, True), + # DoubleConv(in_channels, out_channels) + ResBlock(in_channels, out_channels) + ) + + + def forward(self, x): + + x = self.main(x) + + return x + +class SDFT(nn.Module): + + def __init__(self, color_dim, channels, kernel_size = 3): + super().__init__() + + # generate global conv weights + fan_in = channels * kernel_size ** 2 + self.kernel_size = kernel_size + self.padding = kernel_size // 2 + + self.scale = 1 / math.sqrt(fan_in) + self.modulation = nn.Conv2d(color_dim, channels, 1) + self.weight = nn.Parameter( + torch.randn(1, channels, channels, kernel_size, kernel_size) + ) + + def forward(self, fea, color_style): + # for global adjustation + B, C, H, W = fea.size() + # print(fea.shape, color_style.shape) + style = self.modulation(color_style).view(B, 1, C, 1, 1) + weight = self.scale * self.weight * style + # demodulation + demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8) + weight = weight * demod.view(B, C, 1, 1, 1) + + weight = weight.view( + B * C, C, self.kernel_size, self.kernel_size + ) + + fea = fea.view(1, B * C, H, W) + fea = F.conv2d(fea, weight, padding=self.padding, groups=B) + fea = fea.view(B, C, H, W) + + return fea + + +class UpBlock(nn.Module): + + + def __init__(self, color_dim, in_channels, out_channels, kernel_size = 3, bilinear=True): + super().__init__() + + # if bilinear, use the normal convolutions to reduce the number of channels + if bilinear: + self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False) + + else: + self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2) + + self.conv_cat = nn.Sequential( + nn.Conv2d(in_channels // 2 + in_channels // 8, out_channels, 1, 1, 0), + nn.LeakyReLU(0.2, True), + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), + nn.LeakyReLU(0.2, True) + ) + + self.conv_s = nn.Conv2d(in_channels//2, out_channels, 1, 1, 0) + + # generate global conv weights + self.SDFT = SDFT(color_dim, out_channels, kernel_size) + + + def forward(self, x1, x2, color_style): + # print(x1.shape, x2.shape, color_style.shape) + x1 = self.up(x1) + x1_s = self.conv_s(x1) + + x = torch.cat([x1, x2[:, ::4, :, :]], dim=1) + x = self.conv_cat(x) + x = self.SDFT(x, color_style) + + x = x + x1_s #ResBlock + + return x + + +class ColorEncoder(nn.Module): + def __init__(self, color_dim=512): + super(ColorEncoder, self).__init__() + + # self.vgg = vgg19(pretrained_path=None) + self.vgg = vgg19() + + self.feature2vector = nn.Sequential( + nn.Conv2d(color_dim, color_dim, 4, 2, 2), # 8x8 + nn.LeakyReLU(0.2, True), + nn.Conv2d(color_dim, color_dim, 3, 1, 1), + nn.LeakyReLU(0.2, True), + nn.Conv2d(color_dim, color_dim, 4, 2, 2), # 4x4 + nn.LeakyReLU(0.2, True), + nn.Conv2d(color_dim, color_dim, 3, 1, 1), + nn.LeakyReLU(0.2, True), + nn.AdaptiveAvgPool2d((1, 1)), # 1x1 + nn.Conv2d(color_dim, color_dim//2, 1), # linear-1 + nn.LeakyReLU(0.2, True), + nn.Conv2d(color_dim//2, color_dim//2, 1), # linear-2 + nn.LeakyReLU(0.2, True), + nn.Conv2d(color_dim//2, color_dim, 1), # linear-3 + ) + + self.color_dim = color_dim + + def forward(self, x): + # x #[0, 1] RGB + vgg_fea = self.vgg(x, layer_name='relu5_2') # [B, 512, 16, 16] + + x_color = self.feature2vector(vgg_fea[-1]) # [B, 512, 1, 1] + + return x_color + + +class ColorUNet(nn.Module): + ### this model output is ab + def __init__(self, n_channels=1, n_classes=3, bilinear=True): + super(ColorUNet, self).__init__() + self.n_channels = n_channels + self.n_classes = n_classes + self.bilinear = bilinear + + self.inc = DoubleConv(n_channels, 64) + self.down1 = Down(64, 128) + self.down2 = Down(128, 256) + self.down3 = Down(256, 512) + factor = 2 if bilinear else 1 + self.down4 = Down(512, 1024 // factor) + + self.up1 = UpBlock(512, 1024, 512 // factor, 3, bilinear) + self.up2 = UpBlock(512, 512, 256 // factor, 3, bilinear) + self.up3 = UpBlock(512, 256, 128 // factor, 5, bilinear) + self.up4 = UpBlock(512, 128, 64, 5, bilinear) + self.outc = nn.Sequential( + nn.Conv2d(64, 64, 3, 1, 1), + nn.LeakyReLU(0.2, True), + nn.Conv2d(64, 2, 3, 1, 1), + nn.Tanh() # [-1,1] + ) + + def forward(self, x): + # print(torch.max(x[0]), torch.min(x[0])) #[-1, 1] gray image L + # print(torch.max(x[1]), torch.min(x[1])) # color vector + + x_color = x[1] # [B, 512, 1, 1] + + x1 = self.inc(x[0]) # [B, 64, 256, 256] + x2 = self.down1(x1) # [B, 128, 128, 128] + x3 = self.down2(x2) # [B, 256, 64, 64] + x4 = self.down3(x3) # [B, 512, 32, 32] + x5 = self.down4(x4) # [B, 512, 16, 16] + + x6 = self.up1(x5, x4, x_color) # [B, 256, 32, 32] + x7 = self.up2(x6, x3, x_color) # [B, 128, 64, 64] + x8 = self.up3(x7, x2, x_color) # [B, 64, 128, 128] + x9 = self.up4(x8, x1, x_color) # [B, 64, 256, 256] + x_ab = self.outc(x9) + + return x_ab diff --git a/real_manga/class1/Color 1659315.jpg b/real_manga/class1/Color 1659315.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4cd7046b910b721a4889662d5ca1d7f2588df4a9 --- /dev/null +++ b/real_manga/class1/Color 1659315.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e36e5429d356c0b7ef109f0aacf584332314a619baba0bf9996fc9e7e338b35f +size 10228468 diff --git a/real_manga/class1/Color 3223141571376159.jpg b/real_manga/class1/Color 3223141571376159.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cc46b6d87c51a8a7df48b1472f6fa2b7cbbe84ae --- /dev/null +++ b/real_manga/class1/Color 3223141571376159.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f487b137176cfa45dfceacfd4135fbe96b24e9b67d47b7fd826ae55148dabfbe +size 9479210 diff --git a/real_manga/class1/Color 3486521.jpg b/real_manga/class1/Color 3486521.jpg new file mode 100644 index 0000000000000000000000000000000000000000..68921d97aed6d797773f8b3b993b0fa793fd1043 --- /dev/null +++ b/real_manga/class1/Color 3486521.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0a92505ee915f6a615fa714f624afc609e8416133a65505d728658b75b0634 +size 10157322 diff --git a/real_manga/class1/Color 5102676.jpg b/real_manga/class1/Color 5102676.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e5975b8b5fd64c68f9c8818b4d981cf3da3d9dcf --- /dev/null +++ b/real_manga/class1/Color 5102676.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbdab465b40ab86b3e0e68d597e336456a3e9921c658e428ebfe543ddc4426ae +size 9689133 diff --git a/real_manga/class1/Color 5570824.jpg b/real_manga/class1/Color 5570824.jpg new file mode 100644 index 0000000000000000000000000000000000000000..db38d40bc84dd2c82b31b6e2b961d46456d933d4 --- /dev/null +++ b/real_manga/class1/Color 5570824.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f56c6d28bd482e9eb58c3f069d99c17f8e8b60c0669de8070d638f5c921547d6 +size 10744357 diff --git a/real_manga/class1/Color 5674950.jpg b/real_manga/class1/Color 5674950.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5013656d6d2b238c1949437d348d8c5f7c752de5 --- /dev/null +++ b/real_manga/class1/Color 5674950.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d75ce71d21323da983c95a24409bf23485484d52d293874eb45ab045376baaa +size 10335164 diff --git a/real_manga/class1/Color 5828407151952509.jpg b/real_manga/class1/Color 5828407151952509.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9e18d9607f1ad5af2caa42abec5e585d570b3029 --- /dev/null +++ b/real_manga/class1/Color 5828407151952509.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd598bec0b867950c47415da3d8bbe7c24e0c58bba2df8389e82405032986016 +size 15520424 diff --git a/real_manga/class1/Color 5851155317235124.jpg b/real_manga/class1/Color 5851155317235124.jpg new file mode 100644 index 0000000000000000000000000000000000000000..43073378937e958a06f7af01fb11cf5b61cfe8ab --- /dev/null +++ b/real_manga/class1/Color 5851155317235124.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addaf37cac0187fbbaf64c9e47f5488ecc8e4ec7f88b8bea4416b80b7aab92ae +size 9540948 diff --git a/real_manga/class1/Color 6429789966786911.jpg b/real_manga/class1/Color 6429789966786911.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5b4c38b3b62123403def8ad3f89817a3f2e5a895 --- /dev/null +++ b/real_manga/class1/Color 6429789966786911.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28423eb5fe662ee4b981d2428fb53671bbd016bda736fcc56764054f25c9165e +size 13745820 diff --git a/real_manga/class1/Color 6813581942189493.jpg b/real_manga/class1/Color 6813581942189493.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c700b8f70cc4ccf5b20fc6d44482002ad0ae3c7d --- /dev/null +++ b/real_manga/class1/Color 6813581942189493.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4e9170512381d6dba606fa0f26690b480ef09b968b238b4553601742f10a37f +size 9961355 diff --git a/real_manga/class1/Color 8096755.jpg b/real_manga/class1/Color 8096755.jpg new file mode 100644 index 0000000000000000000000000000000000000000..50b1618caeb884c3ed86f43ad3b8c71013de06b4 --- /dev/null +++ b/real_manga/class1/Color 8096755.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96bb25a877294bb1d490f317fd6abaf8224d3a7d872bb9ff009fbbd23c349e24 +size 9537545 diff --git a/requirement.txt b/requirement.txt new file mode 100644 index 0000000000000000000000000000000000000000..05251b13f8846ba22b20e5c1a2c016152352651f --- /dev/null +++ b/requirement.txt @@ -0,0 +1,7 @@ +numpy +scikit-image +opencv-python +torch +torchvision +tqdm +lmdb \ No newline at end of file diff --git a/test_datasets/Boku/03-copia.png b/test_datasets/Boku/03-copia.png new file mode 100644 index 0000000000000000000000000000000000000000..b6cb2a224056ca2bf87de672bd592c6a137678a3 Binary files /dev/null and b/test_datasets/Boku/03-copia.png differ diff --git a/test_datasets/Boku/03.png b/test_datasets/Boku/03.png new file mode 100644 index 0000000000000000000000000000000000000000..268e7d1cbffdf4e3775583364ec6a73ba72283e1 Binary files /dev/null and b/test_datasets/Boku/03.png differ diff --git a/test_datasets/Boku/09.png b/test_datasets/Boku/09.png new file mode 100644 index 0000000000000000000000000000000000000000..62855f3611fef01d797206be2df2650da4faf8e6 Binary files /dev/null and b/test_datasets/Boku/09.png differ diff --git a/test_datasets/Boku/9_221522-copia.png b/test_datasets/Boku/9_221522-copia.png new file mode 100644 index 0000000000000000000000000000000000000000..07b55527b4de973550dc85878f52b98fa5cd29aa Binary files /dev/null and b/test_datasets/Boku/9_221522-copia.png differ diff --git a/test_datasets/Boku/9_221522.png b/test_datasets/Boku/9_221522.png new file mode 100644 index 0000000000000000000000000000000000000000..879e5722013b28eccc174d18b9d3e92a3e102b6d Binary files /dev/null and b/test_datasets/Boku/9_221522.png differ diff --git a/test_datasets/gray_test/001_in.png b/test_datasets/gray_test/001_in.png new file mode 100644 index 0000000000000000000000000000000000000000..f4d3f661db3d61e3482c98a22335361a93c73c39 --- /dev/null +++ b/test_datasets/gray_test/001_in.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79dd9a5f356d126ca3aaa6832ccbf6648d47f0874b298b8f10ffba465eda27c1 +size 1556838 diff --git a/test_datasets/gray_test/001_ref_a.png b/test_datasets/gray_test/001_ref_a.png new file mode 100644 index 0000000000000000000000000000000000000000..780e491252032d78fc9cf8cb9f82d9bc9582bef4 Binary files /dev/null and b/test_datasets/gray_test/001_ref_a.png differ diff --git a/test_datasets/gray_test/001_ref_b.png b/test_datasets/gray_test/001_ref_b.png new file mode 100644 index 0000000000000000000000000000000000000000..1910793e9d6d1e67397fd3dbcc0b2c97d34ab914 Binary files /dev/null and b/test_datasets/gray_test/001_ref_b.png differ diff --git a/test_datasets/gray_test/002_in.jpeg b/test_datasets/gray_test/002_in.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..d10fd4cf39ea50e24401a690db6e8888d8d9b5d8 Binary files /dev/null and b/test_datasets/gray_test/002_in.jpeg differ diff --git a/test_datasets/gray_test/002_in_ref_a.jpg b/test_datasets/gray_test/002_in_ref_a.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3ab016caf1a7c95aa7626b10f05ed3c99fba642c Binary files /dev/null and b/test_datasets/gray_test/002_in_ref_a.jpg differ diff --git a/test_datasets/gray_test/002_in_ref_b.jpeg b/test_datasets/gray_test/002_in_ref_b.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..bbef66e57f19198a0864d40e25012c82ac23673e --- /dev/null +++ b/test_datasets/gray_test/002_in_ref_b.jpeg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb83a00b49e8fd8d512a0565e23805bcefa2cdd879f7fa98674131fb14436e7 +size 1280388 diff --git a/test_datasets/gray_test/003_in.jpeg b/test_datasets/gray_test/003_in.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..5bdd40483b25c7bad0046b3096b7b877b4677d07 Binary files /dev/null and b/test_datasets/gray_test/003_in.jpeg differ diff --git a/test_datasets/gray_test/003_in_ref_a.jpg b/test_datasets/gray_test/003_in_ref_a.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0a1aeb75de8cdb9ad35926aec742ba2f2b706341 Binary files /dev/null and b/test_datasets/gray_test/003_in_ref_a.jpg differ diff --git a/test_datasets/gray_test/003_in_ref_b.jpg b/test_datasets/gray_test/003_in_ref_b.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3ab016caf1a7c95aa7626b10f05ed3c99fba642c Binary files /dev/null and b/test_datasets/gray_test/003_in_ref_b.jpg differ diff --git a/test_datasets/gray_test/004_in.png b/test_datasets/gray_test/004_in.png new file mode 100644 index 0000000000000000000000000000000000000000..c4ffa8c5f46046626f2dd19a7cb3525dd3a532e4 --- /dev/null +++ b/test_datasets/gray_test/004_in.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9cd1bda825bb212d5964f2514c1dae65c7324d09d14900bd9c3ad25fb0c542e +size 1784692 diff --git a/test_datasets/gray_test/004_ref_1.jpg b/test_datasets/gray_test/004_ref_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f3590a48fe340c9469d80906d75029ef9a030dac Binary files /dev/null and b/test_datasets/gray_test/004_ref_1.jpg differ diff --git a/test_datasets/gray_test/004_ref_2.jpg b/test_datasets/gray_test/004_ref_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3c57893bc76f36ce3928789e4ccb5a820dc37c5c Binary files /dev/null and b/test_datasets/gray_test/004_ref_2.jpg differ diff --git a/test_datasets/gray_test/005_in.png b/test_datasets/gray_test/005_in.png new file mode 100644 index 0000000000000000000000000000000000000000..0d9fdd2d2c502855c2734c66fdd5ea18a3f41f79 --- /dev/null +++ b/test_datasets/gray_test/005_in.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52ba974be1b85624bf006a09b6064639b69ce31fa27e29d9ec85186dde09c847 +size 1141424 diff --git a/test_datasets/gray_test/005_ref_1.jpeg b/test_datasets/gray_test/005_ref_1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..164cba3b5c5dd2d73fde78e17040f97ad8dea268 Binary files /dev/null and b/test_datasets/gray_test/005_ref_1.jpeg differ diff --git a/test_datasets/gray_test/005_ref_2.jpg b/test_datasets/gray_test/005_ref_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f6545ffcb663a649b8522dedaee607652034e193 Binary files /dev/null and b/test_datasets/gray_test/005_ref_2.jpg differ diff --git a/test_datasets/gray_test/005_ref_3.jpeg b/test_datasets/gray_test/005_ref_3.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..0a7577f50407852b430a2bbab93de1fdc6cbb1e0 Binary files /dev/null and b/test_datasets/gray_test/005_ref_3.jpeg differ diff --git a/test_datasets/gray_test/006_in.png b/test_datasets/gray_test/006_in.png new file mode 100644 index 0000000000000000000000000000000000000000..a108b964135a94b44ef9f746468aa98610cba307 --- /dev/null +++ b/test_datasets/gray_test/006_in.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ef2390975f4719df20ed6e6c7d744711ff54f0de39d06afbd2b8df4985542b +size 3131682 diff --git a/test_datasets/gray_test/006_ref.png b/test_datasets/gray_test/006_ref.png new file mode 100644 index 0000000000000000000000000000000000000000..1ba8fc597a675609f3716a8731bc1150023951c8 --- /dev/null +++ b/test_datasets/gray_test/006_ref.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c344c73ec593fcced71da68b3f1f6355da521a1de022f44a63553e97833847 +size 1598956 diff --git a/test_datasets/gray_test/out/001_in_color_a.png b/test_datasets/gray_test/out/001_in_color_a.png new file mode 100644 index 0000000000000000000000000000000000000000..869456909ab670abab5179552c33a38c9251d63b --- /dev/null +++ b/test_datasets/gray_test/out/001_in_color_a.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5245844fb4b215fa304c3dd320ec923d38958cbbf56c3cf06a692adb1c1a64a5 +size 2431760 diff --git a/test_datasets/gray_test/out/001_in_color_b.png b/test_datasets/gray_test/out/001_in_color_b.png new file mode 100644 index 0000000000000000000000000000000000000000..70d6a3f623ef47d552d3dac0729e919aa7385fb6 --- /dev/null +++ b/test_datasets/gray_test/out/001_in_color_b.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:991daf1933443d7cfeaf956b7083e4f537ae7c66e0f6fcc29f9d942a96fc3807 +size 2368191 diff --git a/test_datasets/gray_test/out/002_in_color_a.png b/test_datasets/gray_test/out/002_in_color_a.png new file mode 100644 index 0000000000000000000000000000000000000000..f6ed43931428a7a81d89e25ed84b7920fe48ef88 --- /dev/null +++ b/test_datasets/gray_test/out/002_in_color_a.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f18f626dcc9e7d5befadd5fcf6166a9089c5dc885f9c53c47b2113db27a118 +size 1196754 diff --git a/test_datasets/gray_test/out/002_in_color_b.png b/test_datasets/gray_test/out/002_in_color_b.png new file mode 100644 index 0000000000000000000000000000000000000000..a8a6d7877a086a16804ab455e5cf92550fd53b23 --- /dev/null +++ b/test_datasets/gray_test/out/002_in_color_b.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2feeaf5dc5769fc76d2b5b6903b7ede3891c8a872b889bd8d486daa34b1472a5 +size 1247947 diff --git a/test_datasets/gray_test/out/003_in_color_a.png b/test_datasets/gray_test/out/003_in_color_a.png new file mode 100644 index 0000000000000000000000000000000000000000..ab98f155df62c3d1d89444d3cd0bd4d06c5e61dd --- /dev/null +++ b/test_datasets/gray_test/out/003_in_color_a.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16128a133dbef09cb7e44fca56f6d016d3296f748f7c9acbdbc2dd2b48ae2ecc +size 1019111 diff --git a/test_datasets/gray_test/out/003_in_color_b.png b/test_datasets/gray_test/out/003_in_color_b.png new file mode 100644 index 0000000000000000000000000000000000000000..f42c2540e90fd40adcfb04eda8644ebf9baba988 --- /dev/null +++ b/test_datasets/gray_test/out/003_in_color_b.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2035477322f30b281314b5d2e0e0f1f55c24922c0c9cb913b76bead58d0e26c +size 1092948 diff --git a/test_datasets/gray_test/out/004_in_color.png b/test_datasets/gray_test/out/004_in_color.png new file mode 100644 index 0000000000000000000000000000000000000000..3bda4633385ef7b07ce1738d1a9546cbb334eb54 --- /dev/null +++ b/test_datasets/gray_test/out/004_in_color.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a429625908918a8fad029b44dbcfacb91954fd621fee4f4da4c4b4e83286ab0e +size 2199225 diff --git a/test_datasets/gray_test/out/005_in_color.png b/test_datasets/gray_test/out/005_in_color.png new file mode 100644 index 0000000000000000000000000000000000000000..d8834df98d6a9d9153d0feb2a9b4851938ac9fe4 --- /dev/null +++ b/test_datasets/gray_test/out/005_in_color.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2691b36e52ddd02ab45d82e0799bd7e20553a7226090861d9780c1b33fa355 +size 1794542 diff --git a/test_datasets/gray_test/out/006_in_color.png b/test_datasets/gray_test/out/006_in_color.png new file mode 100644 index 0000000000000000000000000000000000000000..d1415fd22040350127c7060b53cd6619b397f703 --- /dev/null +++ b/test_datasets/gray_test/out/006_in_color.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efe9556c7c1af7611e34f8fcf54270d4a84b2674ed2a3d09909d95c680a0f85 +size 3674934 diff --git a/test_datasets/sketch_test/001_in.jpg b/test_datasets/sketch_test/001_in.jpg new file mode 100644 index 0000000000000000000000000000000000000000..10a0f08cd194a9d5281ccf0673f5059cc52a757d Binary files /dev/null and b/test_datasets/sketch_test/001_in.jpg differ diff --git a/test_datasets/sketch_test/001_ref_a.jpg b/test_datasets/sketch_test/001_ref_a.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0a1aeb75de8cdb9ad35926aec742ba2f2b706341 Binary files /dev/null and b/test_datasets/sketch_test/001_ref_a.jpg differ diff --git a/test_datasets/sketch_test/001_ref_b.jpg b/test_datasets/sketch_test/001_ref_b.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9cfe6ef29f369e4200d4ae9a2bb105c1951dec31 Binary files /dev/null and b/test_datasets/sketch_test/001_ref_b.jpg differ diff --git a/test_datasets/sketch_test/out/001_in_color_a.png b/test_datasets/sketch_test/out/001_in_color_a.png new file mode 100644 index 0000000000000000000000000000000000000000..b3cd0d3560deaea2aa8bc2131472a4eca94b49f3 Binary files /dev/null and b/test_datasets/sketch_test/out/001_in_color_a.png differ diff --git a/test_datasets/sketch_test/out/001_in_color_b.png b/test_datasets/sketch_test/out/001_in_color_b.png new file mode 100644 index 0000000000000000000000000000000000000000..4ade7d4e0144a2a5d5436990590aab61f75f2233 Binary files /dev/null and b/test_datasets/sketch_test/out/001_in_color_b.png differ diff --git a/train.py b/train.py new file mode 100644 index 0000000000000000000000000000000000000000..838c32b2c4b0503b3e6876f51b81141e19409bc1 --- /dev/null +++ b/train.py @@ -0,0 +1,369 @@ +import argparse + +import os + +import numpy as np +from PIL import Image +from skimage import color, io +import torch +from torch import nn, optim +from torch.nn import functional as F +from torch.utils import data +from torchvision import transforms +from tqdm import tqdm + +# from ColorEncoder import ColorEncoder +from models import ColorEncoder, ColorUNet +from vgg_model import vgg19 +from data.data_loader import MultiResolutionDataset + +from utils import tensor_lab2rgb + +from distributed import ( + get_rank, + synchronize, + reduce_loss_dict, +) + +def mkdirss(dirpath): + if not os.path.exists(dirpath): + os.makedirs(dirpath) + +def data_sampler(dataset, shuffle, distributed): + if distributed: + return data.distributed.DistributedSampler(dataset, shuffle=shuffle) + + if shuffle: + return data.RandomSampler(dataset) + + else: + return data.SequentialSampler(dataset) + + +def requires_grad(model, flag=True): + for p in model.parameters(): + p.requires_grad = flag + + +def sample_data(loader): + while True: + for batch in loader: + yield batch + +def Lab2RGB_out(img_lab): + img_lab = img_lab.detach().cpu() + img_l = img_lab[:,:1,:,:] + img_ab = img_lab[:,1:,:,:] + # print(torch.max(img_l), torch.min(img_l)) + # print(torch.max(img_ab), torch.min(img_ab)) + img_l = img_l + 50 + pred_lab = torch.cat((img_l, img_ab), 1)[0,...].numpy() + # grid_lab = utils.make_grid(pred_lab, nrow=1).numpy().astype("float64") + # print(grid_lab.shape) + out = (np.clip(color.lab2rgb(pred_lab.transpose(1, 2, 0)), 0, 1)* 255).astype("uint8") + return out + +def RGB2Lab(inputs): + # input [0, 255] uint8 + # out l: [0, 100], ab: [-110, 110], float32 + return color.rgb2lab(inputs) + +def Normalize(inputs): + l = inputs[:, :, 0:1] + ab = inputs[:, :, 1:3] + l = l - 50 + lab = np.concatenate((l, ab), 2) + + return lab.astype('float32') + +def numpy2tensor(inputs): + out = torch.from_numpy(inputs.transpose(2,0,1)) + return out + +def tensor2numpy(inputs): + out = inputs[0,...].detach().cpu().numpy().transpose(1,2,0) + return out + +def preprocessing(inputs): + # input: rgb, [0, 255], uint8 + img_lab = Normalize(RGB2Lab(inputs)) + img = np.array(inputs, 'float32') # [0, 255] + img = numpy2tensor(img) + img_lab = numpy2tensor(img_lab) + return img.unsqueeze(0), img_lab.unsqueeze(0) + +def uncenter_l(inputs): + l = inputs[:,:1,:,:] + 50 + ab = inputs[:,1:,:,:] + return torch.cat((l, ab), 1) + +def train( + args, + loader, + colorEncoder, + colorUNet, + vggnet, + g_optim, + device, +): + loader = sample_data(loader) + + pbar = range(args.iter) + + if get_rank() == 0: + pbar = tqdm(pbar, initial=args.start_iter, dynamic_ncols=True, smoothing=0.01) + + g_loss_val = 0 + loss_dict = {} + recon_val_all = 0 + fea_val_all = 0 + + if args.distributed: + colorEncoder_module = colorEncoder.module + colorUNet_module = colorUNet.module + + else: + colorEncoder_module = colorEncoder + colorUNet_module = colorUNet + + for idx in pbar: + i = idx + args.start_iter+1 + + if i > args.iter: + print("Done!") + + break + + img, img_ref, img_lab = next(loader) + + # ima = img_ref.numpy() + # ima = ima[0].astype('uint8') + # ima = Image.fromarray(ima.transpose(1,2,0)) + # ima.show() + + img = img.to(device) # GT [B, 3, 256, 256] + img_lab = img_lab.to(device) # GT + + img_ref = img_ref.to(device) # tps_transformed image RGB [B, 3, 256, 256] + + img_l = img_lab[:,:1,:,:] / 50 # [-1, 1] target L + img_ab = img_lab[:,1:,:,:] / 110 # [-1, 1] target ab + # img_ref_ab = img_ref_lab[:,1:,:,:] / 110 # [-1, 1] ref ab + + colorEncoder.train() + colorUNet.train() + + requires_grad(colorEncoder, True) + requires_grad(colorUNet, True) + + ref_color_vector = colorEncoder(img_ref / 255.) + + fake_swap_ab = colorUNet((img_l, ref_color_vector)) # [-1, 1] + + ## recon l1 loss + recon_loss = (F.smooth_l1_loss(fake_swap_ab, img_ab)) * 1 + + ## feature loss + real_img_rgb = img / 255. + features_A = vggnet(real_img_rgb, layer_name='all') + + fake_swap_rgb = tensor_lab2rgb(torch.cat((img_l*50+50, fake_swap_ab*110), 1)) # [0, 1] + features_B = vggnet(fake_swap_rgb, layer_name='all') + # fea_loss = F.l1_loss(features_A[-1], features_B[-1]) * 0.1 + # fea_loss = 0 + + fea_loss1 = F.l1_loss(features_A[0], features_B[0]) / 32 * 0.1 + fea_loss2 = F.l1_loss(features_A[1], features_B[1]) / 16 * 0.1 + fea_loss3 = F.l1_loss(features_A[2], features_B[2]) / 8 * 0.1 + fea_loss4 = F.l1_loss(features_A[3], features_B[3]) / 4 * 0.1 + fea_loss5 = F.l1_loss(features_A[4], features_B[4]) * 0.1 + + fea_loss = fea_loss1 + fea_loss2 + fea_loss3 + fea_loss4 + fea_loss5 + + loss_dict["recon"] = recon_loss + + loss_dict["fea"] = fea_loss + + g_optim.zero_grad() + (recon_loss+fea_loss).backward() + g_optim.step() + + loss_reduced = reduce_loss_dict(loss_dict) + + + recon_val = loss_reduced["recon"].mean().item() + recon_val_all += recon_val + # recon_val = 0 + fea_val = loss_reduced["fea"].mean().item() + fea_val_all += fea_val + # fea_val = 0 + + if get_rank() == 0: + pbar.set_description( + ( + f"recon:{recon_val:.4f}; fea:{fea_val:.4f};" + ) + ) + + + if i % 50 == 0: + print(f"recon_all:{recon_val_all/50:.4f}; fea_all:{fea_val_all/50:.4f};") + recon_val_all = 0 + fea_val_all = 0 + + if i % 500 == 0: + with torch.no_grad(): + colorEncoder.eval() + colorUNet.eval() + + imgsize = 256 + for inum in range(15): + val_img_path = 'test_datasets/val_Manga/in%d.jpg' % (inum + 1) + val_ref_path = 'test_datasets/val_Manga/ref%d.jpg' % (inum + 1) + # val_img_path = 'test_datasets/val_daytime/day_sample/in%d.jpg'%(inum+1) + # val_ref_path = 'test_datasets/val_daytime/night_sample/dark4.jpg' + out_name = 'in%d_ref%d.png'%(inum+1, inum+1) + val_img = Image.open(val_img_path).convert("RGB").resize((imgsize, imgsize)) + val_img_ref = Image.open(val_ref_path).convert("RGB").resize((imgsize, imgsize)) + val_img, val_img_lab = preprocessing(val_img) + val_img_ref, val_img_ref_lab = preprocessing(val_img_ref) + + # val_img = val_img.to(device) + val_img_lab = val_img_lab.to(device) + val_img_ref = val_img_ref.to(device) + # val_img_ref_lab = val_img_ref_lab.to(device) + + val_img_l = val_img_lab[:,:1,:,:] / 50. # [-1, 1] + # val_img_ref_ab = val_img_ref_lab[:,1:,:,:] / 110. # [-1, 1] + + ref_color_vector = colorEncoder(val_img_ref / 255.) # [0, 1] + fake_swap_ab = colorUNet((val_img_l, ref_color_vector)) + + fake_img = torch.cat((val_img_l*50, fake_swap_ab*110), 1) + + sample = np.concatenate((tensor2numpy(val_img), tensor2numpy(val_img_ref), Lab2RGB_out(fake_img)), 1) + + out_dir = 'training_logs/%s/%06d'%(args.experiment_name, i) + mkdirss(out_dir) + io.imsave('%s/%s'%(out_dir, out_name), sample.astype('uint8')) + torch.cuda.empty_cache() + if i % 2500 == 0: + out_dir = "experiments/%s"%(args.experiment_name) + mkdirss(out_dir) + torch.save( + { + "colorEncoder": colorEncoder_module.state_dict(), + "colorUNet": colorUNet_module.state_dict(), + "g_optim": g_optim.state_dict(), + "args": args, + }, + f"%s/{str(i).zfill(6)}.pt"%(out_dir), + ) + + +if __name__ == "__main__": + device = "cuda" + + torch.backends.cudnn.benchmark = True + + parser = argparse.ArgumentParser() + + parser.add_argument("--datasets", type=str) + parser.add_argument("--iter", type=int, default=100000) + parser.add_argument("--batch", type=int, default=16) + parser.add_argument("--size", type=int, default=256) + parser.add_argument("--ckpt", type=str, default=None) + parser.add_argument("--lr", type=float, default=0.0001) + parser.add_argument("--experiment_name", type=str, default="default") + parser.add_argument("--wandb", action="store_true") + parser.add_argument("--local_rank", type=int, default=0) + + args = parser.parse_args() + + n_gpu = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 + args.distributed = n_gpu > 1 + + if args.distributed: + torch.cuda.set_device(args.local_rank) + torch.distributed.init_process_group(backend="nccl", init_method="env://") + synchronize() + + args.start_iter = 0 + + vggnet = vgg19(pretrained_path = './experiments/VGG19/vgg19-dcbb9e9d.pth', require_grad = False) + vggnet = vggnet.to(device) + vggnet.eval() + + colorEncoder = ColorEncoder(color_dim=512).to(device) + colorUNet = ColorUNet(bilinear=True).to(device) + + + g_optim = optim.Adam( + list(colorEncoder.parameters()) + list(colorUNet.parameters()), + lr=args.lr, + betas=(0.9, 0.99), + ) + + if args.ckpt is not None: + print("load model:", args.ckpt) + + ckpt = torch.load(args.ckpt, map_location=lambda storage, loc: storage) + + try: + ckpt_name = os.path.basename(args.ckpt) + args.start_iter = int(os.path.splitext(ckpt_name)[0]) + + except ValueError: + pass + + colorEncoder.load_state_dict(ckpt["colorEncoder"]) + colorUNet.load_state_dict(ckpt["colorUNet"]) + g_optim.load_state_dict(ckpt["g_optim"]) + + # print(args.distributed) + + if args.distributed: + colorEncoder = nn.parallel.DistributedDataParallel( + colorEncoder, + device_ids=[args.local_rank], + output_device=args.local_rank, + broadcast_buffers=False, + ) + + colorUNet = nn.parallel.DistributedDataParallel( + colorUNet, + device_ids=[args.local_rank], + output_device=args.local_rank, + broadcast_buffers=False, + ) + + + transform = transforms.Compose( + [ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(0, 360)) + ] + ) + + datasets = [] + dataset = MultiResolutionDataset(args.datasets, transform, args.size) + datasets.append(dataset) + + loader = data.DataLoader( + data.ConcatDataset(datasets), + batch_size=args.batch, + sampler=data_sampler(dataset, shuffle=True, distributed=args.distributed), + drop_last=True, + ) + + train( + args, + loader, + colorEncoder, + colorUNet, + vggnet, + g_optim, + device, + ) + diff --git a/train_all_gray.py b/train_all_gray.py new file mode 100644 index 0000000000000000000000000000000000000000..a6a26e708b27399eba0a9df87e108212f342f2ec --- /dev/null +++ b/train_all_gray.py @@ -0,0 +1,453 @@ +import argparse + +import os +import re + +import numpy as np +from PIL import Image +from skimage import color, io +import torch +from torch import nn, optim +from torch.nn import functional as F +from torch.utils import data +from torchvision import transforms +from tqdm import tqdm +from torch.autograd import Variable + +# from ColorEncoder import ColorEncoder +from models import ColorEncoder, ColorUNet +from vgg_model import vgg19 +from discriminator import Discriminator +from data.data_loader import MultiResolutionDataset + +from utils import tensor_lab2rgb + +from distributed import ( + get_rank, + synchronize, + reduce_loss_dict, +) + + +def mkdirss(dirpath): + if not os.path.exists(dirpath): + os.makedirs(dirpath) + + +def data_sampler(dataset, shuffle, distributed): + if distributed: + return data.distributed.DistributedSampler(dataset, shuffle=shuffle) + + if shuffle: + return data.RandomSampler(dataset) + + else: + return data.SequentialSampler(dataset) + + +def requires_grad(model, flag=True): + for p in model.parameters(): + p.requires_grad = flag + + +def sample_data(loader): + while True: + for batch in loader: + yield batch + + +def Lab2RGB_out(img_lab): + img_lab = img_lab.detach().cpu() + img_l = img_lab[:, :1, :, :] + img_ab = img_lab[:, 1:, :, :] + # print(torch.max(img_l), torch.min(img_l)) + # print(torch.max(img_ab), torch.min(img_ab)) + img_l = img_l + 50 + pred_lab = torch.cat((img_l, img_ab), 1)[0, ...].numpy() + # grid_lab = utils.make_grid(pred_lab, nrow=1).numpy().astype("float64") + # print(grid_lab.shape) + out = (np.clip(color.lab2rgb(pred_lab.transpose(1, 2, 0)), 0, 1) * 255).astype("uint8") + return out + + +def RGB2Lab(inputs): + # input [0, 255] uint8 + # out l: [0, 100], ab: [-110, 110], float32 + return color.rgb2lab(inputs) + + +def Normalize(inputs): + l = inputs[:, :, 0:1] + ab = inputs[:, :, 1:3] + l = l - 50 + lab = np.concatenate((l, ab), 2) + + return lab.astype('float32') + + +def numpy2tensor(inputs): + out = torch.from_numpy(inputs.transpose(2, 0, 1)) + return out + + +def tensor2numpy(inputs): + out = inputs[0, ...].detach().cpu().numpy().transpose(1, 2, 0) + return out + + +def preprocessing(inputs): + # input: rgb, [0, 255], uint8 + img_lab = Normalize(RGB2Lab(inputs)) + img = np.array(inputs, 'float32') # [0, 255] + img = numpy2tensor(img) + img_lab = numpy2tensor(img_lab) + return img.unsqueeze(0), img_lab.unsqueeze(0) + + +def uncenter_l(inputs): + l = inputs[:, :1, :, :] + 50 + ab = inputs[:, 1:, :, :] + return torch.cat((l, ab), 1) + + +def train( + args, + loader, + colorEncoder, + colorUNet, + discriminator, + vggnet, + g_optim, + d_optim, + device, +): + loader = sample_data(loader) + + pbar = range(args.iter) + + if get_rank() == 0: + pbar = tqdm(pbar, initial=args.start_iter, dynamic_ncols=True, smoothing=0.01) + + g_loss_val = 0 + loss_dict = {} + recon_val_all = 0 + fea_val_all = 0 + disc_val_all = 0 + disc_val_GAN_all = 0 + disc_val = 0 + count = 0 + criterion_GAN = torch.nn.MSELoss().to(device) + + # Calculate output of image discriminator (PatchGAN) + patch = (1, args.size // 2 ** 4, args.size // 2 ** 4) + Tensor = torch.cuda.FloatTensor if device == 'cuda' else torch.FloatTensor + + colorEncoder_module = colorEncoder + colorUNet_module = colorUNet + + for idx in pbar: + i = idx + args.start_iter+1 + + if i > args.iter: + print("Done!") + + break + + img, img_ref, img_lab = next(loader) + + # Adversarial ground truths + valid = Variable(Tensor(np.ones((img.size(0), *patch))), requires_grad=False) + fake = Variable(Tensor(np.zeros((img.size(0), *patch))), requires_grad=False) + + img = img.to(device) # GT [B, 3, 256, 256] + img_lab = img_lab.to(device) # GT + + img_ref = img_ref.to(device) # tps_transformed image RGB [B, 3, 256, 256] + + img_l = img_lab[:, :1, :, :] / 50 # [-1, 1] target L + img_ab = img_lab[:, 1:, :, :] / 110 # [-1, 1] target ab + # img_ref_ab = img_ref_lab[:,1:,:,:] / 110 # [-1, 1] ref ab + + colorEncoder.train() + colorUNet.train() + discriminator.train() + + requires_grad(colorEncoder, True) + requires_grad(colorUNet, True) + requires_grad(discriminator, True) + + # ------------------ + # Train Generators + # ------------------ + + ref_color_vector = colorEncoder(img_ref / 255.) + + fake_swap_ab = colorUNet((img_l, ref_color_vector)) # [-1, 1] + + ## recon l1 loss + recon_loss = (F.smooth_l1_loss(fake_swap_ab, img_ab)) + + ## feature loss + real_img_rgb = img / 255. + features_A = vggnet(real_img_rgb, layer_name='all') + + fake_swap_rgb = tensor_lab2rgb(torch.cat((img_l * 50 + 50, fake_swap_ab * 110), 1)) # [0, 1] + features_B = vggnet(fake_swap_rgb, layer_name='all') + # fea_loss = F.l1_loss(features_A[-1], features_B[-1]) * 0.1 + # fea_loss = 0 + + fea_loss1 = F.l1_loss(features_A[0], features_B[0]) / 32 * 0.1 + fea_loss2 = F.l1_loss(features_A[1], features_B[1]) / 16 * 0.1 + fea_loss3 = F.l1_loss(features_A[2], features_B[2]) / 8 * 0.1 + fea_loss4 = F.l1_loss(features_A[3], features_B[3]) / 4 * 0.1 + fea_loss5 = F.l1_loss(features_A[4], features_B[4]) * 0.1 + + fea_loss = fea_loss1 + fea_loss2 + fea_loss3 + fea_loss4 + fea_loss5 + + ## discriminator loss + real_img_rgb = img / 255. + img_ref_rgb = img_ref / 255. + zero_ab_image = torch.zeros_like(fake_swap_ab) + input_img_rgb = tensor_lab2rgb(torch.cat((img_l * 50 + 50, zero_ab_image), 1)) # [0, 1] + + pred_fake = discriminator(fake_swap_rgb, input_img_rgb, img_ref_rgb) + disc_loss_GAN = criterion_GAN(pred_fake, valid) + disc_loss_GAN = disc_loss_GAN*0.01 + + loss_dict["recon"] = recon_loss + + loss_dict["fea"] = fea_loss + + loss_dict["disc_loss_GAN"] = disc_loss_GAN + + g_optim.zero_grad() + (recon_loss + fea_loss + disc_loss_GAN).backward() + g_optim.step() + + # --------------------- + # Train Discriminator + # --------------------- + # if the disc_loss_GAN<0.003, then start to train Discriminator + # if i%3 == 0: + + # Real loss + pred_real = discriminator(real_img_rgb, input_img_rgb, img_ref_rgb) + loss_real = criterion_GAN(pred_real, valid) + + # Fake loss + pred_fake = discriminator(fake_swap_rgb.detach(), input_img_rgb, img_ref_rgb) + loss_fake = criterion_GAN(pred_fake, fake) + + # Total loss + disc_loss = 0.5 * (loss_real + loss_fake) + + d_optim.zero_grad() + disc_loss.backward() + d_optim.step() + + # loss for discriminator itself + disc_val = disc_loss.mean().item() + disc_val_all += disc_val + count +=1 + + # -------------- + # Log Progress + # -------------- + + loss_reduced = reduce_loss_dict(loss_dict) + + recon_val = loss_reduced["recon"].mean().item() + recon_val_all += recon_val + # recon_val = 0 + fea_val = loss_reduced["fea"].mean().item() + fea_val_all += fea_val + # fea_val = 0 + + # loss for generator + disc_val_GAN = loss_reduced["disc_loss_GAN"].mean().item() + disc_val_GAN_all += disc_val_GAN + + + + if get_rank() == 0: + pbar.set_description( + ( + f"recon:{recon_val:.4f}; fea:{fea_val:.4f}; disc_GAN:{disc_val_GAN:.4f}; discriminator:{disc_val:.4f};" + ) + ) + + if i % 100 == 0: + if disc_val_all!=0: + disc_val_all = disc_val_all/count + print(f"recon_all:{recon_val_all / 100:.4f}; fea_all:{fea_val_all / 100:.4f}; disc_GAN_all:{disc_val_GAN_all / 100:.4f};discriminator:{disc_val_all:.4f};") + recon_val_all = 0 + fea_val_all = 0 + disc_val_GAN_all = 0 + disc_val_all = 0 + count = 0 + + # this code is for model validation, you should prepare you own val dataset and edit code to use it + # if i % 250 == 0: + # with torch.no_grad(): + # colorEncoder.eval() + # colorUNet.eval() + # + # imgsize = 256 + # for inum in range(15): + # val_img_path = 'test_datasets/val_Manga/in%d.jpg' % (inum + 1) + # val_ref_path = 'test_datasets/val_Manga/ref%d.jpg' % (inum + 1) + # # val_img_path = 'test_datasets/val_daytime/day_sample/in%d.jpg'%(inum+1) + # # val_ref_path = 'test_datasets/val_daytime/night_sample/dark4.jpg' + # out_name = 'in%d_ref%d.png' % (inum + 1, inum + 1) + # val_img = Image.open(val_img_path).convert("RGB").resize((imgsize, imgsize)) + # val_img_ref = Image.open(val_ref_path).convert("RGB").resize((imgsize, imgsize)) + # val_img, val_img_lab = preprocessing(val_img) + # val_img_ref, val_img_ref_lab = preprocessing(val_img_ref) + # + # # val_img = val_img.to(device) + # val_img_lab = val_img_lab.to(device) + # val_img_ref = val_img_ref.to(device) + # # val_img_ref_lab = val_img_ref_lab.to(device) + # + # val_img_l = val_img_lab[:, :1, :, :] / 50. # [-1, 1] + # # val_img_ref_ab = val_img_ref_lab[:,1:,:,:] / 110. # [-1, 1] + # + # ref_color_vector = colorEncoder(val_img_ref / 255.) # [0, 1] + # fake_swap_ab = colorUNet((val_img_l, ref_color_vector)) + # + # fake_img = torch.cat((val_img_l * 50, fake_swap_ab * 110), 1) + # + # sample = np.concatenate( + # (tensor2numpy(val_img), tensor2numpy(val_img_ref), Lab2RGB_out(fake_img)), 1) + # + # out_dir = 'training_logs/%s/%06d' % (args.experiment_name, i) + # mkdirss(out_dir) + # io.imsave('%s/%s' % (out_dir, out_name), sample.astype('uint8')) + # torch.cuda.empty_cache() + if i % 2000 == 0: + out_dir_g = "experiments/%s" % (args.experiment_name) + mkdirss(out_dir_g) + torch.save( + { + "colorEncoder": colorEncoder_module.state_dict(), + "colorUNet": colorUNet_module.state_dict(), + "g_optim": g_optim.state_dict(), + "args": args, + }, + f"%s/{str(i).zfill(6)}_gray.pt" % (out_dir_g), + ) + out_dir_d = "experiments/Discriminator" + mkdirss(out_dir_d) + torch.save( + { + "discriminator": discriminator.state_dict(), + "d_optim": d_optim.state_dict(), + "args": args, + }, + f"%s/{str(i).zfill(6)}_d.pt" % (out_dir_d), + ) + + +if __name__ == "__main__": + device = "cuda" + + torch.backends.cudnn.benchmark = True + + parser = argparse.ArgumentParser() + + parser.add_argument("--datasets", type=str) + parser.add_argument("--iter", type=int, default=100000) + parser.add_argument("--batch", type=int, default=16) + parser.add_argument("--size", type=int, default=256) + parser.add_argument("--ckpt", type=str, default=None) + parser.add_argument("--ckpt_disc", type=str, default=None) + parser.add_argument("--lr", type=float, default=0.0001) + parser.add_argument("--lr_disc", type=float, default=0.0002) + parser.add_argument("--experiment_name", type=str, default="default") + parser.add_argument("--wandb", action="store_true") + parser.add_argument("--local_rank", type=int, default=0) + + args = parser.parse_args() + + n_gpu = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 + args.distributed = n_gpu > 1 + + args.start_iter = 0 + + vggnet = vgg19(pretrained_path='./experiments/VGG19/vgg19-dcbb9e9d.pth', require_grad=False) + vggnet = vggnet.to(device) + vggnet.eval() + + colorEncoder = ColorEncoder(color_dim=512).to(device) + colorUNet = ColorUNet(bilinear=True).to(device) + discriminator = Discriminator(in_channels=3).to(device) + + g_optim = optim.Adam( + list(colorEncoder.parameters()) + list(colorUNet.parameters()), + lr=args.lr, + betas=(0.9, 0.99), + ) + + d_optim = optim.Adam( + discriminator.parameters(), + lr=args.lr_disc, + betas=(0.5, 0.999), + ) + + if args.ckpt is not None: + print("load model:", args.ckpt) + + ckpt = torch.load(args.ckpt, map_location=lambda storage, loc: storage) + + try: + ckpt_name = os.path.basename(args.ckpt) + match = re.search(r'\d+', ckpt_name) + if match: + args.start_iter = int(match.group(0)) + else: + args.start_iter = 0 + except ValueError: + pass + + colorEncoder.load_state_dict(ckpt["colorEncoder"]) + colorUNet.load_state_dict(ckpt["colorUNet"]) + g_optim.load_state_dict(ckpt["g_optim"]) + + if args.ckpt_disc is not None: + print("load discriminator model:", args.ckpt_disc) + + ckpt_disc = torch.load(args.ckpt_disc, map_location=lambda storage, loc: storage) + discriminator.load_state_dict(ckpt_disc["discriminator"]) + d_optim.load_state_dict(ckpt_disc["d_optim"]) + # print(args.distributed) + + transform = transforms.Compose( + [ + transforms.RandomHorizontalFlip(), + # transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(-90, 90)) + ] + ) + + datasets = [] + dataset = MultiResolutionDataset(args.datasets, transform, args.size) + datasets.append(dataset) + + loader = data.DataLoader( + data.ConcatDataset(datasets), + batch_size=args.batch, + sampler=data_sampler(dataset, shuffle=True, distributed=args.distributed), + drop_last=True, + ) + + train( + args, + loader, + colorEncoder, + colorUNet, + discriminator, + vggnet, + g_optim, + d_optim, + device, + ) diff --git a/train_all_sketch.py b/train_all_sketch.py new file mode 100644 index 0000000000000000000000000000000000000000..c0e3d9b24a704ba62d0705d29a6f9bb747dde296 --- /dev/null +++ b/train_all_sketch.py @@ -0,0 +1,464 @@ +import argparse + +import os +import re + +import numpy as np +from PIL import Image +from skimage import color, io +import torch +from torch import nn, optim +from torch.nn import functional as F +from torch.utils import data +from torchvision import transforms +from tqdm import tqdm +from torch.autograd import Variable + +# from ColorEncoder import ColorEncoder +from models import ColorEncoder, ColorUNet +from vgg_model import vgg19 +from discriminator import Discriminator +# from data.data_loader import MultiResolutionDataset +from data.data_loader_sketch import MultiResolutionDataset + +from utils import tensor_lab2rgb + +from distributed import ( + get_rank, + synchronize, + reduce_loss_dict, +) + + +def mkdirss(dirpath): + if not os.path.exists(dirpath): + os.makedirs(dirpath) + + +def data_sampler(dataset, shuffle, distributed): + if distributed: + return data.distributed.DistributedSampler(dataset, shuffle=shuffle) + + if shuffle: + return data.RandomSampler(dataset) + + else: + return data.SequentialSampler(dataset) + + +def requires_grad(model, flag=True): + for p in model.parameters(): + p.requires_grad = flag + + +def sample_data(loader): + while True: + for batch in loader: + yield batch + + +def Lab2RGB_out(img_lab): + img_lab = img_lab.detach().cpu() + img_l = img_lab[:, :1, :, :] + img_ab = img_lab[:, 1:, :, :] + # print(torch.max(img_l), torch.min(img_l)) + # print(torch.max(img_ab), torch.min(img_ab)) + img_l = img_l + 50 + pred_lab = torch.cat((img_l, img_ab), 1)[0, ...].numpy() + # grid_lab = utils.make_grid(pred_lab, nrow=1).numpy().astype("float64") + # print(grid_lab.shape) + out = (np.clip(color.lab2rgb(pred_lab.transpose(1, 2, 0)), 0, 1) * 255).astype("uint8") + return out + + +def RGB2Lab(inputs): + # input [0, 255] uint8 + # out l: [0, 100], ab: [-110, 110], float32 + return color.rgb2lab(inputs) + + +def Normalize(inputs): + l = inputs[:, :, 0:1] + ab = inputs[:, :, 1:3] + l = l - 50 + lab = np.concatenate((l, ab), 2) + + return lab.astype('float32') + + +def numpy2tensor(inputs): + out = torch.from_numpy(inputs.transpose(2, 0, 1)) + return out + + +def tensor2numpy(inputs): + out = inputs[0, ...].detach().cpu().numpy().transpose(1, 2, 0) + return out + + +def preprocessing(inputs): + # input: rgb, [0, 255], uint8 + img_lab = Normalize(RGB2Lab(inputs)) + img = np.array(inputs, 'float32') # [0, 255] + img = numpy2tensor(img) + img_lab = numpy2tensor(img_lab) + return img.unsqueeze(0), img_lab.unsqueeze(0) + + +def uncenter_l(inputs): + l = inputs[:, :1, :, :] + 50 + ab = inputs[:, 1:, :, :] + return torch.cat((l, ab), 1) + + +def train( + args, + loader, + colorEncoder, + colorUNet, + discriminator, + vggnet, + g_optim, + d_optim, + device, +): + loader = sample_data(loader) + + pbar = range(args.iter) + + if get_rank() == 0: + pbar = tqdm(pbar, initial=args.start_iter, dynamic_ncols=True, smoothing=0.01) + + g_loss_val = 0 + loss_dict = {} + recon_val_all = 0 + fea_val_all = 0 + disc_val_all = 0 + disc_val_GAN_all = 0 + disc_val = 0 + count = 0 + criterion_GAN = torch.nn.MSELoss().to(device) + + # Calculate output of image discriminator (PatchGAN) + patch = (1, args.size // 2 ** 4, args.size // 2 ** 4) + Tensor = torch.cuda.FloatTensor if device == 'cuda' else torch.FloatTensor + + colorEncoder_module = colorEncoder + colorUNet_module = colorUNet + + for idx in pbar: + i = idx + args.start_iter + 1 + + if i > args.iter: + print("Done!") + + break + + # img, img_ref, img_lab = next(loader) + img, img_ref, img_lab, img_lab_sketch = next(loader) + + # Adversarial ground truths + valid = Variable(Tensor(np.ones((img.size(0), *patch))), requires_grad=False) + fake = Variable(Tensor(np.zeros((img.size(0), *patch))), requires_grad=False) + # ima = img_ref.numpy() + # ima = ima[0].astype('uint8') + # ima = Image.fromarray(ima.transpose(1,2,0)) + # ima.show() + + img = img.to(device) # GT [B, 3, 256, 256] + img_lab = img_lab.to(device) # GT + img_lab_sketch = img_lab_sketch.to(device) + + img_ref = img_ref.to(device) # tps_transformed image RGB [B, 3, 256, 256] + + img_l = img_lab_sketch[:, :1, :, :] / 50 # [-1, 1] target L + img_ab = img_lab[:, 1:, :, :] / 110 # [-1, 1] target ab + # img_ref_ab = img_ref_lab[:,1:,:,:] / 110 # [-1, 1] ref ab + + colorEncoder.train() + colorUNet.train() + discriminator.train() + + requires_grad(colorEncoder, True) + requires_grad(colorUNet, True) + requires_grad(discriminator, True) + + # ------------------ + # Train Generators + # ------------------ + + ref_color_vector = colorEncoder(img_ref / 255.) + + fake_swap_ab = colorUNet((img_l, ref_color_vector)) # [-1, 1] + + ## recon l1 loss + recon_loss = (F.smooth_l1_loss(fake_swap_ab, img_ab)) + + ## feature loss + real_img_rgb = img / 255. + features_A = vggnet(real_img_rgb, layer_name='all') + + fake_swap_rgb = tensor_lab2rgb(torch.cat((img_l * 50 + 50, fake_swap_ab * 110), 1)) # [0, 1] + features_B = vggnet(fake_swap_rgb, layer_name='all') + # fea_loss = F.l1_loss(features_A[-1], features_B[-1]) * 0.1 + # fea_loss = 0 + + fea_loss1 = F.l1_loss(features_A[0], features_B[0]) / 32 * 0.1 + fea_loss2 = F.l1_loss(features_A[1], features_B[1]) / 16 * 0.1 + fea_loss3 = F.l1_loss(features_A[2], features_B[2]) / 8 * 0.1 + fea_loss4 = F.l1_loss(features_A[3], features_B[3]) / 4 * 0.1 + fea_loss5 = F.l1_loss(features_A[4], features_B[4]) * 0.1 + + fea_loss = fea_loss1 + fea_loss2 + fea_loss3 + fea_loss4 + fea_loss5 + + ## discriminator loss + real_img_rgb = img / 255. + img_ref_rgb = img_ref / 255. + zero_ab_image = torch.zeros_like(fake_swap_ab) + input_img_rgb = tensor_lab2rgb(torch.cat((img_l * 50 + 50, zero_ab_image), 1)) # [0, 1] + + # ima = input_img_rgb.cpu() + # ima = ima.numpy()*255 + # ima = ima[0].astype('uint8') + # ima = Image.fromarray(ima.transpose(1,2,0)) + # ima.show() + + pred_fake = discriminator(fake_swap_rgb, input_img_rgb, img_ref_rgb) + disc_loss_GAN = criterion_GAN(pred_fake, valid) + disc_loss_GAN = disc_loss_GAN * 0.01 + + loss_dict["recon"] = recon_loss + + loss_dict["fea"] = fea_loss + + loss_dict["disc_loss_GAN"] = disc_loss_GAN + + g_optim.zero_grad() + (recon_loss + fea_loss + disc_loss_GAN).backward() + g_optim.step() + + # --------------------- + # Train Discriminator + # --------------------- + # if the disc_loss_GAN<0.003, then start to train Discriminator + if i % 35 == 0: + # Real loss + pred_real = discriminator(real_img_rgb, input_img_rgb, img_ref_rgb) + loss_real = criterion_GAN(pred_real, valid) + + # Fake loss + pred_fake = discriminator(fake_swap_rgb.detach(), input_img_rgb, img_ref_rgb) + loss_fake = criterion_GAN(pred_fake, fake) + + # Total loss + disc_loss = 0.5 * (loss_real + loss_fake) + + d_optim.zero_grad() + disc_loss.backward() + d_optim.step() + + # loss for discriminator itself + disc_val = disc_loss.mean().item() + disc_val_all += disc_val + count += 1 + + # -------------- + # Log Progress + # -------------- + + loss_reduced = reduce_loss_dict(loss_dict) + + recon_val = loss_reduced["recon"].mean().item() + recon_val_all += recon_val + # recon_val = 0 + fea_val = loss_reduced["fea"].mean().item() + fea_val_all += fea_val + # fea_val = 0 + + # loss for generator + disc_val_GAN = loss_reduced["disc_loss_GAN"].mean().item() + disc_val_GAN_all += disc_val_GAN + + if get_rank() == 0: + pbar.set_description( + ( + f"recon:{recon_val:.4f}; fea:{fea_val:.4f}; disc_GAN:{disc_val_GAN:.4f}; discriminator:{disc_val:.4f};" + ) + ) + + if i % 100 == 0: + if disc_val_all != 0: + disc_val_all = disc_val_all / count + print( + f"recon_all:{recon_val_all / 100:.4f}; fea_all:{fea_val_all / 100:.4f}; disc_GAN_all:{disc_val_GAN_all / 100:.4f};discriminator:{disc_val_all:.4f};") + recon_val_all = 0 + fea_val_all = 0 + disc_val_GAN_all = 0 + disc_val_all = 0 + count = 0 + + # this code is for model validation, you should prepare you own val dataset and edit code to use it + # if i % 250 == 0: + # with torch.no_grad(): + # colorEncoder.eval() + # colorUNet.eval() + # + # imgsize = 256 + # for inum in range(12): + # val_img_path = 'test_datasets/val_Sketch/in%d.jpg' % (inum + 1) + # val_ref_path = 'test_datasets/val_Sketch/ref%d.jpg' % (inum + 1) + # # val_img_path = 'test_datasets/val_daytime/day_sample/in%d.jpg'%(inum+1) + # # val_ref_path = 'test_datasets/val_daytime/night_sample/dark4.jpg' + # out_name = 'in%d_ref%d.png' % (inum + 1, inum + 1) + # val_img = Image.open(val_img_path).convert("RGB").resize((imgsize, imgsize)) + # val_img_ref = Image.open(val_ref_path).convert("RGB").resize((imgsize, imgsize)) + # val_img, val_img_lab = preprocessing(val_img) + # val_img_ref, val_img_ref_lab = preprocessing(val_img_ref) + # + # # val_img = val_img.to(device) + # val_img_lab = val_img_lab.to(device) + # val_img_ref = val_img_ref.to(device) + # # val_img_ref_lab = val_img_ref_lab.to(device) + # + # val_img_l = val_img_lab[:, :1, :, :] / 50. # [-1, 1] + # # val_img_ref_ab = val_img_ref_lab[:,1:,:,:] / 110. # [-1, 1] + # + # ref_color_vector = colorEncoder(val_img_ref / 255.) # [0, 1] + # fake_swap_ab = colorUNet((val_img_l, ref_color_vector)) + # + # fake_img = torch.cat((val_img_l * 50, fake_swap_ab * 110), 1) + # + # sample = np.concatenate( + # (tensor2numpy(val_img), tensor2numpy(val_img_ref), Lab2RGB_out(fake_img)), 1) + # + # out_dir = 'training_logs/%s/%06d' % (args.experiment_name, i) + # mkdirss(out_dir) + # io.imsave('%s/%s' % (out_dir, out_name), sample.astype('uint8')) + # torch.cuda.empty_cache() + if i % 2000 == 0: + out_dir_g = "experiments/%s" % (args.experiment_name) + mkdirss(out_dir_g) + torch.save( + { + "colorEncoder": colorEncoder_module.state_dict(), + "colorUNet": colorUNet_module.state_dict(), + "g_optim": g_optim.state_dict(), + "args": args, + }, + f"%s/{str(i).zfill(6)}_sketch.pt" % (out_dir_g), + ) + out_dir_d = "experiments/Discriminator" + mkdirss(out_dir_d) + torch.save( + { + "discriminator": discriminator.state_dict(), + "d_optim": d_optim.state_dict(), + "args": args, + }, + f"%s/{str(i).zfill(6)}_d.pt" % (out_dir_d), + ) + + +if __name__ == "__main__": + device = "cuda" + + torch.backends.cudnn.benchmark = True + + parser = argparse.ArgumentParser() + + parser.add_argument("--datasets", type=str) + parser.add_argument("--iter", type=int, default=200000) + parser.add_argument("--batch", type=int, default=16) + parser.add_argument("--size", type=int, default=256) + parser.add_argument("--ckpt", type=str, default=None) + parser.add_argument("--ckpt_disc", type=str, default=None) + parser.add_argument("--lr", type=float, default=0.0001) + parser.add_argument("--lr_disc", type=float, default=0.0002) + parser.add_argument("--experiment_name", type=str, default="default") + parser.add_argument("--wandb", action="store_true") + parser.add_argument("--local_rank", type=int, default=0) + + args = parser.parse_args() + + n_gpu = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 + args.distributed = n_gpu > 1 + + args.start_iter = 0 + + vggnet = vgg19(pretrained_path='./experiments/VGG19/vgg19-dcbb9e9d.pth', require_grad=False) + vggnet = vggnet.to(device) + vggnet.eval() + + colorEncoder = ColorEncoder(color_dim=512).to(device) + colorUNet = ColorUNet(bilinear=True).to(device) + discriminator = Discriminator(in_channels=3).to(device) + + g_optim = optim.Adam( + list(colorEncoder.parameters()) + list(colorUNet.parameters()), + lr=args.lr, + betas=(0.9, 0.99), + ) + + d_optim = optim.Adam( + discriminator.parameters(), + lr=args.lr_disc, + betas=(0.5, 0.999), + ) + + if args.ckpt is not None: + print("load model:", args.ckpt) + + ckpt = torch.load(args.ckpt, map_location=lambda storage, loc: storage) + + try: + ckpt_name = os.path.basename(args.ckpt) + match = re.search(r'\d+', ckpt_name) + if match: + args.start_iter = int(match.group(0)) + else: + args.start_iter = 0 + except ValueError: + pass + + colorEncoder.load_state_dict(ckpt["colorEncoder"]) + colorUNet.load_state_dict(ckpt["colorUNet"]) + g_optim.load_state_dict(ckpt["g_optim"]) + + if args.ckpt_disc is not None: + print("load discriminator model:", args.ckpt_disc) + + ckpt_disc = torch.load(args.ckpt_disc, map_location=lambda storage, loc: storage) + discriminator.load_state_dict(ckpt_disc["discriminator"]) + d_optim.load_state_dict(ckpt_disc["d_optim"]) + # print(args.distributed) + + transform = transforms.Compose( + [ + transforms.RandomHorizontalFlip(), + # transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(-90, 90)) + ] + ) + + datasets = [] + dataset = MultiResolutionDataset(args.datasets, transform, args.size) + datasets.append(dataset) + + loader = data.DataLoader( + data.ConcatDataset(datasets), + batch_size=args.batch, + sampler=data_sampler(dataset, shuffle=True, distributed=args.distributed), + drop_last=True, + ) + + train( + args, + loader, + colorEncoder, + colorUNet, + discriminator, + vggnet, + g_optim, + d_optim, + device, + ) diff --git a/train_disc.py b/train_disc.py new file mode 100644 index 0000000000000000000000000000000000000000..4281466f964b04397d95f5ac4210e784c7a11f4a --- /dev/null +++ b/train_disc.py @@ -0,0 +1,337 @@ +import argparse + +import os + +import numpy as np +from PIL import Image +from skimage import color, io +import torch +from torch import nn, optim +from torch.nn import functional as F +from torch.utils import data +from torchvision import transforms +from tqdm import tqdm +from torch.autograd import Variable + +# from ColorEncoder import ColorEncoder +from models import ColorEncoder, ColorUNet +from discriminator import Discriminator +from data.data_loader import MultiResolutionDataset + +from utils import tensor_lab2rgb + +from distributed import ( + get_rank, + synchronize, + reduce_loss_dict, +) + + +def mkdirss(dirpath): + if not os.path.exists(dirpath): + os.makedirs(dirpath) + + +def data_sampler(dataset, shuffle, distributed): + if distributed: + return data.distributed.DistributedSampler(dataset, shuffle=shuffle) + + if shuffle: + return data.RandomSampler(dataset) + + else: + return data.SequentialSampler(dataset) + + +def requires_grad(model, flag=True): + for p in model.parameters(): + p.requires_grad = flag + + +def sample_data(loader): + while True: + for batch in loader: + yield batch + + +def Lab2RGB_out(img_lab): + img_lab = img_lab.detach().cpu() + img_l = img_lab[:, :1, :, :] + img_ab = img_lab[:, 1:, :, :] + # print(torch.max(img_l), torch.min(img_l)) + # print(torch.max(img_ab), torch.min(img_ab)) + img_l = img_l + 50 + pred_lab = torch.cat((img_l, img_ab), 1)[0, ...].numpy() + # grid_lab = utils.make_grid(pred_lab, nrow=1).numpy().astype("float64") + # print(grid_lab.shape) + out = (np.clip(color.lab2rgb(pred_lab.transpose(1, 2, 0)), 0, 1) * 255).astype("uint8") + return out + + +def RGB2Lab(inputs): + # input [0, 255] uint8 + # out l: [0, 100], ab: [-110, 110], float32 + return color.rgb2lab(inputs) + + +def Normalize(inputs): + l = inputs[:, :, 0:1] + ab = inputs[:, :, 1:3] + l = l - 50 + lab = np.concatenate((l, ab), 2) + + return lab.astype('float32') + + +def numpy2tensor(inputs): + out = torch.from_numpy(inputs.transpose(2, 0, 1)) + return out + + +def tensor2numpy(inputs): + out = inputs[0, ...].detach().cpu().numpy().transpose(1, 2, 0) + return out + + +def preprocessing(inputs): + # input: rgb, [0, 255], uint8 + img_lab = Normalize(RGB2Lab(inputs)) + img = np.array(inputs, 'float32') # [0, 255] + img = numpy2tensor(img) + img_lab = numpy2tensor(img_lab) + return img.unsqueeze(0), img_lab.unsqueeze(0) + + +def uncenter_l(inputs): + l = inputs[:, :1, :, :] + 50 + ab = inputs[:, 1:, :, :] + return torch.cat((l, ab), 1) + + +def train( + args, + loader, + colorEncoder, + colorUNet, + discriminator, + d_optim, + device, +): + loader = sample_data(loader) + + pbar = range(args.iter) + + if get_rank() == 0: + pbar = tqdm(pbar, initial=args.start_iter, dynamic_ncols=True, smoothing=0.01) + + disc_val_all = 0 + criterion_GAN = torch.nn.MSELoss().to(device) + + # Calculate output of image discriminator (PatchGAN) + patch = (1, args.size // 2 ** 4, args.size // 2 ** 4) + Tensor = torch.cuda.FloatTensor if device == 'cuda' else torch.FloatTensor + + for idx in pbar: + i = idx + args.start_iter + + if i > args.iter: + print("Done!") + + break + + img, img_ref, img_lab = next(loader) + + # Adversarial ground truths + valid = Variable(Tensor(np.ones((img.size(0), *patch))), requires_grad=False) + fake = Variable(Tensor(np.zeros((img.size(0), *patch))), requires_grad=False) + # ima = img.numpy() + # ima = ima[0].astype('uint8') + # ima = Image.fromarray(ima.transpose(1,2,0)) + # ima.show() + + img = img.to(device) # GT [B, 3, 256, 256] + img_lab = img_lab.to(device) # GT + + img_ref = img_ref.to(device) # tps_transformed image RGB [B, 3, 256, 256] + + img_l = img_lab[:, :1, :, :] / 50 # [-1, 1] target L + img_ab = img_lab[:, 1:, :, :] / 110 # [-1, 1] target ab + # img_ref_ab = img_ref_lab[:,1:,:,:] / 110 # [-1, 1] ref ab + + colorEncoder.eval() + colorUNet.eval() + discriminator.train() + + requires_grad(colorEncoder, False) + requires_grad(colorUNet, False) + requires_grad(discriminator, True) + + with torch.no_grad(): + ref_color_vector = colorEncoder(img_ref / 255.) + fake_swap_ab = colorUNet((img_l, ref_color_vector)) # [-1, 1] + + fake_swap_rgb = tensor_lab2rgb(torch.cat((img_l * 50 + 50, fake_swap_ab * 110), 1)) # [0, 1] + real_img_rgb = img / 255. + img_ref_rgb = img_ref / 255. + + zero_ab_image = torch.zeros_like(fake_swap_ab) + input_img_rgb = tensor_lab2rgb(torch.cat((img_l * 50 + 50, zero_ab_image), 1)) # [0, 1] + + # show the gray image + + # input_img_rgb_cpu = input_img_rgb.cpu() + # ima = input_img_rgb_cpu.numpy() + # ima = ima*255 + # ima = ima[0].astype('uint8') + # ima = Image.fromarray(ima.transpose(1,2,0)) + # ima.show() + + # Real loss + pred_real = discriminator(real_img_rgb, input_img_rgb, img_ref_rgb) + loss_real = criterion_GAN(pred_real, valid) + + # Fake loss + pred_fake = discriminator(fake_swap_rgb.detach(), input_img_rgb, img_ref_rgb) + loss_fake = criterion_GAN(pred_fake, fake) + + # Total loss + disc_loss = 0.5 * (loss_real + loss_fake) + + d_optim.zero_grad() + disc_loss.backward() + d_optim.step() + + disc_val = disc_loss.mean().item() + disc_val_all += disc_val + + if get_rank() == 0: + pbar.set_description( + ( + f"discriminator:{disc_val:.4f};" + ) + ) + + if i % 100 == 0: + print(f"discriminator:{disc_val_all / 100:.4f};") + disc_val_all = 0 + if i % 1000 == 0: + out_dir = "experiments/%s" % (args.experiment_name) + mkdirss(out_dir) + torch.save( + { + "discriminator": discriminator.state_dict(), + "d_optim": d_optim.state_dict(), + "args": args, + }, + f"%s/{str(i).zfill(6)}_ds.pt" % (out_dir), + ) + + +if __name__ == "__main__": + device = "cuda" + + torch.backends.cudnn.benchmark = True + + parser = argparse.ArgumentParser() + + parser.add_argument("--datasets", type=str) + parser.add_argument("--iter", type=int, default=100000) + parser.add_argument("--batch", type=int, default=16) + parser.add_argument("--size", type=int, default=256) + parser.add_argument("--ckpt", type=str, default=None) + parser.add_argument("--ckpt_disc", type=str, default=None) + parser.add_argument("--lr", type=float, default=0.0002) + parser.add_argument("--experiment_name", type=str, default="default") + parser.add_argument("--wandb", action="store_true") + parser.add_argument("--local_rank", type=int, default=0) + + args = parser.parse_args() + + n_gpu = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 + args.distributed = n_gpu > 1 + + if args.distributed: + torch.cuda.set_device(args.local_rank) + torch.distributed.init_process_group(backend="nccl", init_method="env://") + synchronize() + + args.start_iter = 0 + + colorEncoder = ColorEncoder(color_dim=512).to(device) + colorUNet = ColorUNet(bilinear=True).to(device) + discriminator = Discriminator(in_channels=3).to(device) + + d_optim = optim.Adam( + discriminator.parameters(), + lr=args.lr, + betas=(0.5, 0.999), + ) + + if args.ckpt is not None: + print("load model:", args.ckpt) + + ckpt = torch.load(args.ckpt, map_location=lambda storage, loc: storage) + + colorEncoder.load_state_dict(ckpt["colorEncoder"]) + colorUNet.load_state_dict(ckpt["colorUNet"]) + + if args.ckpt_disc is not None: + print("load discriminator model:", args.ckpt_disc) + + ckpt_disc = torch.load(args.ckpt_disc, map_location=lambda storage, loc: storage) + + try: + ckpt_name = os.path.basename(args.ckpt_disc) + args.start_iter = int(os.path.splitext(ckpt_name)[0]) + + except ValueError: + pass + + discriminator.load_state_dict(ckpt_disc["discriminator"]) + d_optim.load_state_dict(ckpt_disc["d_optim"]) + + # print(args.distributed) + + if args.distributed: + colorEncoder = nn.parallel.DistributedDataParallel( + colorEncoder, + device_ids=[args.local_rank], + output_device=args.local_rank, + broadcast_buffers=False, + ) + + colorUNet = nn.parallel.DistributedDataParallel( + colorUNet, + device_ids=[args.local_rank], + output_device=args.local_rank, + broadcast_buffers=False, + ) + + transform = transforms.Compose( + [ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(0, 360)) + ] + ) + + datasets = [] + dataset = MultiResolutionDataset(args.datasets, transform, args.size) + datasets.append(dataset) + + loader = data.DataLoader( + data.ConcatDataset(datasets), + batch_size=args.batch, + sampler=data_sampler(dataset, shuffle=True, distributed=args.distributed), + drop_last=True, + ) + + train( + args, + loader, + colorEncoder, + colorUNet, + discriminator, + d_optim, + device, + ) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..293ff83cd6a3c06a1ada030062ddacf8979fe26a --- /dev/null +++ b/utils.py @@ -0,0 +1,87 @@ +import torch +import numpy as np +# stdlib +import os +# 3p +from skimage import io +import cv2 +xyz_from_rgb = np.array( + [[0.412453, 0.357580, 0.180423], [0.212671, 0.715160, 0.072169], [0.019334, 0.119193, 0.950227]] +) +rgb_from_xyz = np.array( + [[3.24048134, -0.96925495, 0.05564664], [-1.53715152, 1.87599, -0.20404134], [-0.49853633, 0.04155593, 1.05731107]] +) + + +def tensor_lab2rgb(input): + """ + n * 3* h *w + """ + input_trans = input.transpose(1, 2).transpose(2, 3) # n * h * w * 3 + L, a, b = input_trans[:, :, :, 0:1], input_trans[:, :, :, 1:2], input_trans[:, :, :, 2:] + y = (L + 16.0) / 116.0 + x = (a / 500.0) + y + z = y - (b / 200.0) + + neg_mask = z.data < 0 + z[neg_mask] = 0 + xyz = torch.cat((x, y, z), dim=3) + + mask = xyz.data > 0.2068966 + mask_xyz = xyz.clone() + mask_xyz[mask] = torch.pow(xyz[mask], 3.0) + mask_xyz[~mask] = (xyz[~mask] - 16.0 / 116.0) / 7.787 + mask_xyz[:, :, :, 0] = mask_xyz[:, :, :, 0] * 0.95047 + mask_xyz[:, :, :, 2] = mask_xyz[:, :, :, 2] * 1.08883 + + rgb_trans = torch.mm(mask_xyz.view(-1, 3), torch.from_numpy(rgb_from_xyz).type_as(xyz)).view( + input.size(0), input.size(2), input.size(3), 3 + ) + rgb = rgb_trans.transpose(2, 3).transpose(1, 2) + + mask = rgb > 0.0031308 + mask_rgb = rgb.clone() + mask_rgb[mask] = 1.055 * torch.pow(rgb[mask], 1 / 2.4) - 0.055 + mask_rgb[~mask] = rgb[~mask] * 12.92 + + neg_mask = mask_rgb.data < 0 + large_mask = mask_rgb.data > 1 + mask_rgb[neg_mask] = 0 + mask_rgb[large_mask] = 1 + return mask_rgb + +def get_files(img_dir): + imgs, masks, xmls = list_files(img_dir) + return imgs, masks, xmls + + +def list_files(in_path): + img_files = [] + mask_files = [] + gt_files = [] + for (dirpath, dirnames, filenames) in os.walk(in_path): + for file in filenames: + filename, ext = os.path.splitext(file) + ext = str.lower(ext) + if ext == '.jpg' or ext == '.jpeg' or ext == '.gif' or ext == '.png' or ext == '.pgm': + img_files.append(os.path.join(dirpath, file)) + elif ext == '.bmp': + mask_files.append(os.path.join(dirpath, file)) + elif ext == '.xml' or ext == '.gt' or ext == '.txt': + gt_files.append(os.path.join(dirpath, file)) + elif ext == '.zip': + continue + return img_files, mask_files, gt_files + + +def load_image(img_file): + img = io.imread(img_file) # RGB order + if img.shape[0] == 2: + img = img[0] + if len(img.shape) == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + if img.shape[2] == 4: + img = img[:, :, :3] + img = np.array(img) + + return img \ No newline at end of file diff --git a/verif.py b/verif.py new file mode 100644 index 0000000000000000000000000000000000000000..c1169cddfda4a9a08a56d54d08507a783c479478 --- /dev/null +++ b/verif.py @@ -0,0 +1,10 @@ +import torch + +# Verifica si CUDA está disponible +print(torch.cuda.is_available()) + +# Muestra la versión de CUDA que está utilizando PyTorch +print(torch.version.cuda) + +# Muestra la versión de PyTorch +print(torch.__version__) diff --git a/vgg_model.py b/vgg_model.py new file mode 100644 index 0000000000000000000000000000000000000000..4e028d0416ec997b6b09abe5065c33f3756b0c9e --- /dev/null +++ b/vgg_model.py @@ -0,0 +1,136 @@ +from torchvision import models +from collections import namedtuple +import torch +import torch.nn as nn + +def vgg_preprocess(tensor): + # input is RGB tensor which ranges in [0,1] + # output is RGB tensor which ranges + mean_val = torch.Tensor([0.485, 0.456, 0.406]).type_as(tensor).view(-1, 1, 1) + std_val = torch.Tensor([0.229, 0.224, 0.225]).type_as(tensor).view(-1, 1, 1) + tensor_norm = (tensor - mean_val) / std_val + return tensor_norm + +class vgg19(nn.Module): + + def __init__(self, pretrained_path = './experiments/VGG19/vgg19-dcbb9e9d.pth', require_grad = False): + super(vgg19, self).__init__() + self.vgg_model = models.vgg19() + if pretrained_path != None: + print('----load pretrained vgg19----') + self.vgg_model.load_state_dict(torch.load(pretrained_path)) + print('----load done!----') + self.vgg_feature = self.vgg_model.features + self.seq_list = [nn.Sequential(ele) for ele in self.vgg_feature] + # self.vgg_layer = ['conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', + # 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', + # 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', + # 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', + # 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5'] + + # self.vgg_layer = ['relu1_2', 'relu2_2', 'relu3_2', 'relu4_2', 'relu5_2'] + + if not require_grad: + for parameter in self.parameters(): + parameter.requires_grad = False + + def forward(self, x, layer_name='relu5_2'): + ### x: RGB [0, 1], input should be [0, 1] + x = vgg_preprocess(x) + + conv1_1 = self.seq_list[0](x) + relu1_1 = self.seq_list[1](conv1_1) + conv1_2 = self.seq_list[2](relu1_1) + relu1_2 = self.seq_list[3](conv1_2) + pool1 = self.seq_list[4](relu1_2) + + conv2_1 = self.seq_list[5](pool1) + relu2_1 = self.seq_list[6](conv2_1) + conv2_2 = self.seq_list[7](relu2_1) + relu2_2 = self.seq_list[8](conv2_2) + pool2 = self.seq_list[9](relu2_2) + + conv3_1 = self.seq_list[10](pool2) + relu3_1 = self.seq_list[11](conv3_1) + conv3_2 = self.seq_list[12](relu3_1) + relu3_2 = self.seq_list[13](conv3_2) + conv3_3 = self.seq_list[14](relu3_2) + relu3_3 = self.seq_list[15](conv3_3) + conv3_4 = self.seq_list[16](relu3_3) + relu3_4 = self.seq_list[17](conv3_4) + pool3 = self.seq_list[18](relu3_4) + + conv4_1 = self.seq_list[19](pool3) + relu4_1 = self.seq_list[20](conv4_1) + conv4_2 = self.seq_list[21](relu4_1) + relu4_2 = self.seq_list[22](conv4_2) + conv4_3 = self.seq_list[23](relu4_2) + relu4_3 = self.seq_list[24](conv4_3) + conv4_4 = self.seq_list[25](relu4_3) + relu4_4 = self.seq_list[26](conv4_4) + pool4 = self.seq_list[27](relu4_4) + + conv5_1 = self.seq_list[28](pool4) + relu5_1 = self.seq_list[29](conv5_1) + conv5_2 = self.seq_list[30](relu5_1) + relu5_2 = self.seq_list[31](conv5_2) # [B, 512, 16, 16] + conv5_3 = self.seq_list[32](relu5_2) + relu5_3 = self.seq_list[33](conv5_3) + conv5_4 = self.seq_list[34](relu5_3) + relu5_4 = self.seq_list[35](conv5_4) + pool5 = self.seq_list[36](relu5_4) # [B, 512, 8, 8] + + # vgg_output = namedtuple("vgg_output", self.vgg_layer) + + # vgg_list = [conv1_1, relu1_1, conv1_2, relu1_2, pool1, + # conv2_1, relu2_1, conv2_2, relu2_2, pool2, + # conv3_1, relu3_1, conv3_2, relu3_2, conv3_3, relu3_3, conv3_4, relu3_4, pool3, + # conv4_1, relu4_1, conv4_2, relu4_2, conv4_3, relu4_3, conv4_4, relu4_4, pool4, + # conv5_1, relu5_1, conv5_2, relu5_2, conv5_3, relu5_3, conv5_4, relu5_4, pool5] + + if layer_name == 'relu5_2': + vgg_list = [relu5_2] + elif layer_name == 'conv5_2': + vgg_list = [conv5_2] + elif layer_name == 'relu5_4': + vgg_list = [relu5_4] + elif layer_name == 'pool5': + # print('pool5') + vgg_list = [pool5] + elif layer_name == 'all': + vgg_list = [relu1_2, relu2_2, relu3_2, relu4_2, relu5_2] + + # out = vgg_output(*vgg_list) + + return vgg_list + +class vgg19_class_fea(nn.Module): + + def __init__(self, pretrained_path = './experiments/vgg19-dcbb9e9d.pth', require_grad = False): + super(vgg19_class_fea, self).__init__() + self.vgg_model = models.vgg19() + print('----load pretrained vgg19----') + self.vgg_model.load_state_dict(torch.load(pretrained_path)) + print('----load done!----') + self.vgg_feature = self.vgg_model.features + self.avgpool = self.vgg_model.avgpool + self.classifier = self.vgg_model.classifier + + self.seq_list = [nn.Sequential(ele) for ele in self.vgg_feature] # 37层 + if not require_grad: + for parameter in self.parameters(): + parameter.requires_grad = False + + def forward(self, x): + ### x: RGB [0, 1], input should be [0, 1] + x = vgg_preprocess(x) + + for i in range(len(self.seq_list)): + x = self.seq_list[i](x) + if i == 31: + relu5_2 = x + + x = self.avgpool(x) + x = torch.flatten(x, 1) + x_class = self.classifier(x) + return x_class, relu5_2 \ No newline at end of file