CHEN11102
/

AI

Model card Files Files and versions Community

CHEN11102 commited on Aug 16

Commit

1772f26

•

1 Parent(s): 2ae9d6c

Upload 47 files

Browse files

Files changed (48) hide show

.gitattributes +3 -0
CONTRIBUTING.md +29 -0
LICENSE +202 -0
README.md +276 -0
WINDOWS_INSTALLATION.md +89 -0
cog.yaml +23 -0
datasets/create_middlebury_tfrecord.py +164 -0
datasets/create_ucf101_tfrecord.py +138 -0
datasets/create_vimeo90K_tfrecord.py +167 -0
datasets/create_xiph_tfrecord.py +146 -0
datasets/util.py +204 -0
eval/.DS_Store +0 -0
eval/config/middlebury.gin +18 -0
eval/config/ucf101.gin +18 -0
eval/config/vimeo_90K.gin +18 -0
eval/config/xiph_2K.gin +18 -0
eval/config/xiph_4K.gin +18 -0
eval/eval_cli.py +216 -0
eval/interpolator.py +209 -0
eval/interpolator_cli.py +197 -0
eval/interpolator_test.py +109 -0
eval/util.py +162 -0
losses/losses.py +266 -0
losses/vgg19_loss.py +362 -0
models/.DS_Store +0 -0
models/film_net/feature_extractor.py +193 -0
models/film_net/fusion.py +140 -0
models/film_net/interpolator.py +207 -0
models/film_net/options.py +81 -0
models/film_net/pyramid_flow_estimator.py +163 -0
models/film_net/util.py +143 -0
moment.gif +3 -0
photos/one.png +3 -0
photos/two.png +3 -0
predict.py +88 -0
requirements.txt +14 -0
training/.DS_Store +0 -0
training/augmentation_lib.py +220 -0
training/build_saved_model_cli.py +98 -0
training/config/film_net-L1.gin +55 -0
training/config/film_net-Style.gin +66 -0
training/config/film_net-VGG.gin +64 -0
training/data_lib.py +296 -0
training/eval_lib.py +131 -0
training/metrics_lib.py +142 -0
training/model_lib.py +53 -0
training/train.py +131 -0
training/train_lib.py +343 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+moment.gif filter=lfs diff=lfs merge=lfs -text
+photos/one.png filter=lfs diff=lfs merge=lfs -text
+photos/two.png filter=lfs diff=lfs merge=lfs -text

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,29 @@

+# How to Contribute
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+## Contributor License Agreement
+Contributions to this project must be accompanied by a Contributor License
+Agreement (CLA). You (or your employer) retain the copyright to your
+contribution; this simply gives us permission to use and redistribute your
+contributions as part of the project. Head over to
+<https://cla.developers.google.com/> to see your current agreements on file or
+to sign a new one.
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+## Code Reviews
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+## Community Guidelines
+This project follows
+[Google's Open Source Community Guidelines](https://opensource.google/conduct/).

LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,276 @@

+# FILM: Frame Interpolation for Large Motion
+### [Website](https://film-net.github.io/) | [Paper](https://arxiv.org/pdf/2202.04901.pdf) | [Google AI Blog](https://ai.googleblog.com/2022/10/large-motion-frame-interpolation.html) | [Tensorflow Hub Colab](https://www.tensorflow.org/hub/tutorials/tf_hub_film_example) | [YouTube](https://www.youtube.com/watch?v=OAD-BieIjH4) <br>
+The official Tensorflow 2 implementation of our high quality frame interpolation neural network. We present a unified single-network approach that doesn't use additional pre-trained networks, like optical flow or depth, and yet achieve state-of-the-art results. We use a multi-scale feature extractor that shares the same convolution weights across the scales. Our model is trainable from frame triplets alone. <br>
+[FILM: Frame Interpolation for Large Motion](https://arxiv.org/abs/2202.04901) <br />
+[Fitsum Reda](https://fitsumreda.github.io/)<sup>1</sup>, [Janne Kontkanen](https://scholar.google.com/citations?user=MnXc4JQAAAAJ&hl=en)<sup>1</sup>, [Eric Tabellion](http://www.tabellion.org/et/)<sup>1</sup>, [Deqing Sun](https://deqings.github.io/)<sup>1</sup>, [Caroline Pantofaru](https://scholar.google.com/citations?user=vKAKE1gAAAAJ&hl=en)<sup>1</sup>, [Brian Curless](https://homes.cs.washington.edu/~curless/)<sup>1,2</sup><br />
+<sup>1</sup>Google Research, <sup>2</sup>University of Washington<br />
+In ECCV 2022.
+![A sample 2 seconds moment.](https://github.com/googlestaging/frame-interpolation/blob/main/moment.gif)
+FILM transforms near-duplicate photos into a slow motion footage that look like it is shot with a video camera.
+## Web Demo
+Integrated into [Hugging Face Spaces 🤗](https://huggingface.co/spaces) using [Gradio](https://github.com/gradio-app/gradio). Try out the Web Demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/johngoad/frame-interpolation)
+Try the interpolation model with the replicate web demo at
+[![Replicate](https://replicate.com/google-research/frame-interpolation/badge)](https://replicate.com/google-research/frame-interpolation)
+Try FILM to interpolate between two or more images with the PyTTI-Tools at [![PyTTI-Tools:FILM](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.sandbox.google.com/github/pytti-tools/frame-interpolation/blob/main/PyTTI_Tools_FiLM-colab.ipynb#scrollTo=-7TD7YZJbsy_)
+An alternative Colab for running FILM on arbitrarily more input images, not just on two images, [![FILM-Gdrive](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1NuaPPSvUhYafymUf2mEkvhnEtpD5oihs)
+## Change Log
+* **Nov 28, 2022**: Upgrade `eval.interpolator_cli` for **high resolution frame interpolation**. `--block_height` and `--block_width` determine the total number of patches (`block_height*block_width`) to subdivide the input images. By default, both arguments are set to 1, and so no subdivision will be done.
+* **Mar 12, 2022**: Support for Windows, see [WINDOWS_INSTALLATION.md](https://github.com/google-research/frame-interpolation/blob/main/WINDOWS_INSTALLATION.md).
+* **Mar 09, 2022**: Support for **high resolution frame interpolation**. Set `--block_height` and `--block_width` in `eval.interpolator_test` to extract patches from the inputs, and reconstruct the interpolated frame from the iteratively interpolated patches.
+## Installation
+*   Get Frame Interpolation source codes
+```
+git clone https://github.com/google-research/frame-interpolation
+cd frame-interpolation
+```
+*   Optionally, pull the recommended Docker base image
+```
+docker pull gcr.io/deeplearning-platform-release/tf2-gpu.2-6:latest
+```
+* If you do not use Docker, set up your NVIDIA GPU environment with:
+    * [Anaconda Python 3.9](https://www.anaconda.com/products/individual)
+    * [CUDA Toolkit 11.2.1](https://developer.nvidia.com/cuda-11.2.1-download-archive)
+    * [cuDNN 8.1.0](https://developer.nvidia.com/rdp/cudnn-download)
+*   Install frame interpolation dependencies
+```
+pip3 install -r requirements.txt
+sudo apt-get install -y ffmpeg
+```
+### See [WINDOWS_INSTALLATION](https://github.com/google-research/frame-interpolation/blob/main/WINDOWS_INSTALLATION.md) for Windows Support
+## Pre-trained Models
+*   Create a directory where you can keep large files. Ideally, not in this
+    directory.
+```
+mkdir -p <pretrained_models>
+```
+*   Download pre-trained TF2 Saved Models from
+    [google drive](https://drive.google.com/drive/folders/1q8110-qp225asX3DQvZnfLfJPkCHmDpy?usp=sharing)
+    and put into `<pretrained_models>`.
+The downloaded folder should have the following structure:
+```
+<pretrained_models>/
+├── film_net/
+│   ├── L1/
+│   ├── Style/
+│   ├── VGG/
+├── vgg/
+│   ├── imagenet-vgg-verydeep-19.mat
+```
+## Running the Codes
+The following instructions run the interpolator on the photos provided in
+'frame-interpolation/photos'.
+### One mid-frame interpolation
+To generate an intermediate photo from the input near-duplicate photos, simply run:
+```
+python3 -m eval.interpolator_test \
+   --frame1 photos/one.png \
+   --frame2 photos/two.png \
+   --model_path <pretrained_models>/film_net/Style/saved_model \
+   --output_frame photos/output_middle.png
+```
+This will produce the sub-frame at `t=0.5` and save as 'photos/output_middle.png'.
+### Many in-between frames interpolation
+It takes in a set of directories identified by a glob (--pattern). Each directory
+is expected to contain at least two input frames, with each contiguous frame
+pair treated as an input to generate in-between frames. Frames should be named such that when sorted (naturally) with `natsort`, their desired order is unchanged.
+```
+python3 -m eval.interpolator_cli \
+   --pattern "photos" \
+   --model_path <pretrained_models>/film_net/Style/saved_model \
+   --times_to_interpolate 6 \
+   --output_video
+```
+You will find the interpolated frames (including the input frames) in
+'photos/interpolated_frames/', and the interpolated video at
+'photos/interpolated.mp4'.
+The number of frames is determined by `--times_to_interpolate`, which controls
+the number of times the frame interpolator is invoked. When the number of frames
+in a directory is `num_frames`, the number of output frames will be
+`(2^times_to_interpolate+1)*(num_frames-1)`.
+## Datasets
+We use [Vimeo-90K](http://data.csail.mit.edu/tofu/dataset/vimeo_triplet.zip) as
+our main training dataset. For quantitative evaluations, we rely on commonly
+used benchmark datasets, specifically:
+*   [Vimeo-90K](http://data.csail.mit.edu/tofu/testset/vimeo_interp_test.zip)
+*   [Middlebury-Other](https://vision.middlebury.edu/flow/data)
+*   [UCF101](https://people.cs.umass.edu/~hzjiang/projects/superslomo/UCF101_results.zip)
+*   [Xiph](https://github.com/sniklaus/softmax-splatting/blob/master/benchmark.py)
+### Creating a TFRecord
+The training and benchmark evaluation scripts expect the frame triplets in the
+[TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) storage format. <br />
+We have included scripts that encode the relevant frame triplets into a
+[tf.train.Example](https://www.tensorflow.org/api_docs/python/tf/train/Example)
+data format, and export to a TFRecord file. <br />
+You can use the commands `python3 -m
+datasets.create_<dataset_name>_tfrecord --help` for more information.
+For example, run the command below to create a TFRecord for the Middlebury-other
+dataset. Download the [images](https://vision.middlebury.edu/flow/data) and point `--input_dir` to the unzipped folder path.
+```
+python3 -m datasets.create_middlebury_tfrecord \
+  --input_dir=<root folder of middlebury-other> \
+  --output_tfrecord_filepath=<output tfrecord filepath> \
+  --num_shards=3
+```
+The above command will output a TFRecord file with 3 shards as `<output tfrecord filepath>@3`.
+## Training
+Below are our training gin configuration files for the different loss function:
+```
+training/
+├── config/
+│   ├── film_net-L1.gin
+│   ├── film_net-VGG.gin
+│   ├── film_net-Style.gin
+```
+To launch a training, simply pass the configuration filepath to the desired
+experiment. <br />
+By default, it uses all visible GPUs for training. To debug or train
+on a CPU, append `--mode cpu`.
+```
+python3 -m training.train \
+   --gin_config training/config/<config filename>.gin \
+   --base_folder <base folder for all training runs> \
+   --label <descriptive label for the run>
+```
+*   When training finishes, the folder structure will look like this:
+```
+<base_folder>/
+├── <label>/
+│   ├── config.gin
+│   ├── eval/
+│   ├── train/
+│   ├── saved_model/
+```
+### Build a SavedModel
+Optionally, to build a
+[SavedModel](https://www.tensorflow.org/guide/saved_model) format from a trained
+checkpoints folder, you can use this command:
+```
+python3 -m training.build_saved_model_cli \
+   --base_folder <base folder of training sessions> \
+   --label <the name of the run>
+```
+*   By default, a SavedModel is created when the training loop ends, and it will be saved at
+    `<base_folder>/<label>/saved_model`.
+## Evaluation on Benchmarks
+Below, we provided the evaluation gin configuration files for the benchmarks we
+have considered:
+```
+eval/
+├── config/
+│   ├── middlebury.gin
+│   ├── ucf101.gin
+│   ├── vimeo_90K.gin
+│   ├── xiph_2K.gin
+│   ├── xiph_4K.gin
+```
+To run an evaluation, simply pass the configuration file of the desired evaluation dataset. <br />
+If a GPU is visible, it runs on it.
+```
+python3 -m eval.eval_cli \
+   --gin_config eval/config/<eval_dataset>.gin \
+   --model_path <pretrained_models>/film_net/L1/saved_model
+```
+The above command will produce the PSNR and SSIM scores presented in the paper.
+## Citation
+If you find this implementation useful in your works, please acknowledge it
+appropriately by citing:
+```
+@inproceedings{reda2022film,
+ title = {FILM: Frame Interpolation for Large Motion},
+ author = {Fitsum Reda and Janne Kontkanen and Eric Tabellion and Deqing Sun and Caroline Pantofaru and Brian Curless},
+ booktitle = {European Conference on Computer Vision (ECCV)},
+ year = {2022}
+}
+```
+```
+@misc{film-tf,
+  title = {Tensorflow 2 Implementation of "FILM: Frame Interpolation for Large Motion"},
+  author = {Fitsum Reda and Janne Kontkanen and Eric Tabellion and Deqing Sun and Caroline Pantofaru and Brian Curless},
+  year = {2022},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/google-research/frame-interpolation}}
+}
+```
+## Acknowledgments
+We would like to thank Richard Tucker, Jason Lai and David Minnen. We would also
+like to thank Jamie Aspinall for the imagery included in this repository.
+## Coding style
+*   2 spaces for indentation
+*   80 character line length
+*   PEP8 formatting
+## Disclaimer
+This is not an officially supported Google product.

WINDOWS_INSTALLATION.md ADDED Viewed

	@@ -0,0 +1,89 @@

+# [FILM](https://github.com/google-research/frame-interpolation): Windows Installation Instructions
+## Anaconda Python 3.9 (Optional)
+#### Install Anaconda3 Python3.9
+* Go to [https://www.anaconda.com/products/individual](https://www.anaconda.com/products/individual) and click the "Download" button.
+* Download the Windows [64-Bit](https://repo.anaconda.com/archive/Anaconda3-2021.11-Windows-x86_64.exe) or [32-bit](https://repo.anaconda.com/archive/Anaconda3-2021.11-Windows-x86.exe) Graphical Installer, depending on your system needs.
+* Run the downloaded (`.exe`) file to begin the installation.
+* (Optional) Check the "Add Anaconda3 to my PATH environment variable". You may get a 'red text' warning of its implications, you may ignore it for this setup.
+#### Create a new Anaconda virtual environment
+* Open a new Terminal
+* Type the following command:
+```
+conda create -n frame_interpolation pip python=3.9
+```
+* The above command will create a new virtual environment with the name `frame_interpolation`
+#### Activate the Anaconda virtual environment
+* Activate the newly created virtual environment by typing in your terminal (Command Prompt or PowerShell)
+```
+conda activate frame_interpolation
+```
+* Once activated, your terminal should look like:
+```
+(frame_interpolation) <present working directory> >
+```
+## NVIDIA GPU Support
+#### Install CUDA Toolkit
+* Go to [https://developer.nvidia.com/cuda-11.2.1-download-archive](https://developer.nvidia.com/cuda-11.2.1-download-archive) and select your `Windows`.
+* Download and install `CUDA Tookit 11.2.1`.
+* Additional CUDA installation information available [here](https://docs.nvidia.com/cuda/archive/11.2.2/cuda-installation-guide-microsoft-windows/index.html).
+#### Install cuDNN
+* Go to [https://developer.nvidia.com/rdp/cudnn-download](https://developer.nvidia.com/rdp/cudnn-download).
+* Create a user profile (if needed) and login.
+* Select `cuDNN v8.1.0 (January 26th, 2021), for CUDA 11.0,11.1 and 11.2`.
+* Download [cuDNN Library for Widnows (x86)](https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.1.0.77/11.2_20210127/cudnn-11.2-windows-x64-v8.1.0.77.zip).
+* Extract the contents of the zipped folder (it contains a folder named `cuda`) into `<INSTALL_PATH>\NVIDIA GPU Computing Toolkit\CUDA\v11.2\`. `<INSTALL_PATH>` points to the installation directory specified during CUDA Toolkit installation. By default, `<INSTAL_PATH> = C:\Program Files`.
+#### Environment Setup
+* Add the following paths to your 'Advanced System Settings' > 'Environment Variables ...' > Edit 'Path', and add:
+    * <INSTALL_PATH>\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin
+    * <INSTALL_PATH>\NVIDIA GPU Computing Toolkit\CUDA\v11.2\libnvvp
+    * <INSTALL_PATH>\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include
+    * <INSTALL_PATH>\NVIDIA GPU Computing Toolkit\CUDA\v11.2\extras\CUPTI\lib64
+    * <INSTALL_PATH>\NVIDIA GPU Computing Toolkit\CUDA\v11.2\cuda\bin
+#### Verify Installation
+* Open a **new** terminal and type `conda activate frame_interpolation`.
+* Install (temporarily) tensorflow and run a simple operation, by typing:
+```
+pip install --ignore-installed --upgrade tensorflow==2.6.0
+python -c "import tensorflow as tf;print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
+```
+* You should see success messages: 'Created device /job:localhost/replica:0/task:0/device:GPU:0'.
+## FILM Installation
+* Get Frame Interpolation source codes
+```
+git clone https://github.com/google-research/frame-interpolation
+cd frame-interpolation
+```
+* Install dependencies
+```
+pip install -r requirements.txt
+conda install -c conda-forge ffmpeg
+```
+* Download pre-traned models, detailed [here](https://github.com/google-research/frame-interpolation#pre-trained-models).
+## Running the Codes
+* One mid-frame interpolation. Note: `python3` may not be recognized in Windows, so simply drop `3` as below.
+```
+python -m eval.interpolator_test --frame1 photos\one.png --frame2 photos\two.png --model_path <pretrained_models>\film_net\Style\saved_model --output_frame photos\output_middle.png
+```
+* Large resolution mid-frame interpolation: Set `block_height` and `--block_width` to subdivide along the height and width to create patches, where the interpolator will be run iteratively, and the resulting interpolated mid-patches will be reconstructed into a final mid-frame. In the example below, will create and run on 4 patches (2*2).
+```
+python -m eval.interpolator_test --frame1 photos\one.png --frame2 photos\two.png --block_height 2 --block_wdith 2 --model_path <pretrained_models>\film_net\Style\saved_model --output_frame photos\output_middle.png
+```
+* Many in-between frames interpolation
+```
+python -m eval.interpolator_cli --pattern "photos" --model_path <pretrained_models>\film_net\Style\saved_model --times_to_interpolate 6 --output_video
+```
+## Acknowledgments
+This windows installation guide is heavily based on [tensorflow-object-detection-api-tutorial](https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html) .

cog.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+build:
+  gpu: true
+  cuda: "11.2"
+  python_version: "3.8"
+  system_packages:
+    - "libgl1-mesa-glx"
+    - "libglib2.0-0"
+  python_packages:
+    - "ipython==7.30.1"
+    - "tensorflow-gpu==2.8.0"
+    - "tensorflow-datasets==4.4.0"
+    - "tensorflow-addons==0.15.0"
+    - "absl-py==0.12.0"
+    - "gin-config==0.5.0"
+    - "parameterized==0.8.1"
+    - "mediapy==1.0.3"
+    - "scikit-image==0.19.1"
+    - "apache-beam==2.34.0"
+  run:
+    - apt-get update && apt-get install -y software-properties-common
+    - apt-get install ffmpeg -y
+predict: "predict.py:Predictor"

datasets/create_middlebury_tfrecord.py ADDED Viewed

	@@ -0,0 +1,164 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Beam pipeline that generates Middlebury `Other Datasets` triplet TFRecords.
+Middlebury interpolation evaluation dataset consists of two subsets.
+(1) Two frames only, without the intermediate golden frame. A total of 12 such
+  pairs, with folder names (Army, Backyard, Basketball, Dumptruck,
+  Evergreen, Grove, Mequon, Schefflera, Teddy, Urban, Wooden, Yosemite)
+(2) Two frames together with the intermediate golden frame. A total of 12 such
+  triplets, with folder names (Beanbags, Dimetrodon, DogDance, Grove2,
+  Grove3, Hydrangea, MiniCooper, RubberWhale, Urban2, Urban3, Venus, Walking)
+This script runs on (2), i.e. the dataset with the golden frames. For more
+information, visit https://vision.middlebury.edu/flow/data.
+Input to the script is the root-folder that contains the unzipped folders
+of input pairs (other-data) and golen frames (other-gt-interp).
+Output TFRecord is a tf.train.Example proto of each image triplet.
+The feature_map takes the form:
+  feature_map {
+      'frame_0/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_0/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_0/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_0/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_1/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_1/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_2/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_2/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'path':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+  }
+Usage example:
+  python3 -m frame_interpolation.datasets.create_middlebury_tfrecord \
+    --input_dir=<root folder of middlebury-other> \
+    --output_tfrecord_filepath=<output tfrecord filepath>
+"""
+import os
+from . import util
+from absl import app
+from absl import flags
+from absl import logging
+import apache_beam as beam
+import tensorflow as tf
+_INPUT_DIR = flags.DEFINE_string(
+    'input_dir',
+    default='/root/path/to/middlebury-other',
+    help='Path to the root directory of the `Other Datasets` of the Middlebury '
+    'interpolation evaluation data. '
+    'We expect the data to have been downloaded and unzipped. \n'
+    'Folder structures:\n'
+    '| raw_middlebury_other_dataset/\n'
+    '|  other-data/\n'
+    '|  |  Beanbags\n'
+    '|  |  |  frame10.png\n'
+    '|  |  |  frame11.png\n'
+    '|  |  Dimetrodon\n'
+    '|  |  |  frame10.png\n'
+    '|  |  |  frame11.png\n'
+    '|  |  ...\n'
+    '|  other-gt-interp/\n'
+    '|  |  Beanbags\n'
+    '|  |  |  frame10i11.png\n'
+    '|  |  Dimetrodon\n'
+    '|  |  |  frame10i11.png\n'
+    '|  |  ...\n')
+_INPUT_PAIRS_FOLDERNAME = flags.DEFINE_string(
+    'input_pairs_foldername',
+    default='other-data',
+    help='Foldername containing the folders of the input frame pairs.')
+_GOLDEN_FOLDERNAME = flags.DEFINE_string(
+    'golden_foldername',
+    default='other-gt-interp',
+    help='Foldername containing the folders of the golden frame.')
+_OUTPUT_TFRECORD_FILEPATH = flags.DEFINE_string(
+    'output_tfrecord_filepath',
+    default=None,
+    required=True,
+    help='Filepath to the output TFRecord file.')
+_NUM_SHARDS = flags.DEFINE_integer('num_shards',
+    default=3,
+    help='Number of shards used for the output.')
+# Image key -> basename for frame interpolator: start / middle / end frames.
+_INTERPOLATOR_IMAGES_MAP = {
+    'frame_0': 'frame10.png',
+    'frame_1': 'frame10i11.png',
+    'frame_2': 'frame11.png',
+}
+def main(unused_argv):
+  """Creates and runs a Beam pipeline to write frame triplets as a TFRecord."""
+  # Collect the list of folder paths containing the input and golen frames.
+  pairs_list = tf.io.gfile.listdir(
+      os.path.join(_INPUT_DIR.value, _INPUT_PAIRS_FOLDERNAME.value))
+  folder_names = [
+      _INPUT_PAIRS_FOLDERNAME.value, _GOLDEN_FOLDERNAME.value,
+      _INPUT_PAIRS_FOLDERNAME.value
+  ]
+  triplet_dicts = []
+  for pair in pairs_list:
+    triplet_dict = {
+        image_key: os.path.join(_INPUT_DIR.value, folder, pair, image_basename)
+        for folder, (image_key, image_basename
+                    ) in zip(folder_names, _INTERPOLATOR_IMAGES_MAP.items())
+    }
+    triplet_dicts.append(triplet_dict)
+  p = beam.Pipeline('DirectRunner')
+  (p | 'ReadInputTripletDicts' >> beam.Create(triplet_dicts)  # pylint: disable=expression-not-assigned
+   | 'GenerateSingleExample' >> beam.ParDo(
+       util.ExampleGenerator(_INTERPOLATOR_IMAGES_MAP))
+   | 'WriteToTFRecord' >> beam.io.tfrecordio.WriteToTFRecord(
+       file_path_prefix=_OUTPUT_TFRECORD_FILEPATH.value,
+       num_shards=_NUM_SHARDS.value,
+       coder=beam.coders.BytesCoder()))
+  result = p.run()
+  result.wait_until_finish()
+  logging.info('Succeeded in creating the output TFRecord file: \'%s@%s\'.',
+    _OUTPUT_TFRECORD_FILEPATH.value, str(_NUM_SHARDS.value))
+if __name__ == '__main__':
+  app.run(main)

datasets/create_ucf101_tfrecord.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Beam pipeline that generates UCF101 `interp_test` triplet TFRecords.
+UCF101 interpolation evaluation dataset consists of 379 triplets, with the
+middle frame being the golden intermediate. The dataset is available here:
+https://people.cs.umass.edu/~hzjiang/projects/superslomo/UCF101_results.zip.
+Input to the script is the root folder that contains the unzipped
+`UCF101_results` folder.
+Output TFRecord is a tf.train.Example proto of each image triplet.
+The feature_map takes the form:
+  feature_map {
+      'frame_0/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_0/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_0/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_0/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_1/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_1/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_2/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_2/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'path':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+  }
+Usage example:
+  python3 -m frame_interpolation.datasets.create_ucf101_tfrecord \
+    --input_dir=<root folder of UCF101_results> \
+    --output_tfrecord_filepath=<output tfrecord filepath>
+"""
+import os
+from . import util
+from absl import app
+from absl import flags
+from absl import logging
+import apache_beam as beam
+import tensorflow as tf
+_INPUT_DIR = flags.DEFINE_string(
+    'input_dir',
+    default='/root/path/to/UCF101_results/ucf101_interp_ours',
+    help='Path to the root directory of the `UCF101_results` of the UCF101 '
+    'interpolation evaluation data. '
+    'We expect the data to have been downloaded and unzipped. \n'
+    'Folder structures:\n'
+    '| raw_UCF101_results/\n'
+    '|  ucf101_interp_ours/\n'
+    '|  |  1/\n'
+    '|  |  |  frame_00.png\n'
+    '|  |  |  frame_01_gt.png\n'
+    '|  |  |  frame_01_ours.png\n'
+    '|  |  |  frame_02.png\n'
+    '|  |  2/\n'
+    '|  |  |  frame_00.png\n'
+    '|  |  |  frame_01_gt.png\n'
+    '|  |  |  frame_01_ours.png\n'
+    '|  |  |  frame_02.png\n'
+    '|  |  ...\n'
+    '|  ucf101_sepconv/\n'
+    '|  ...\n')
+_OUTPUT_TFRECORD_FILEPATH = flags.DEFINE_string(
+    'output_tfrecord_filepath',
+    default=None,
+    required=True,
+    help='Filepath to the output TFRecord file.')
+_NUM_SHARDS = flags.DEFINE_integer('num_shards',
+    default=2,
+    help='Number of shards used for the output.')
+# Image key -> basename for frame interpolator: start / middle / end frames.
+_INTERPOLATOR_IMAGES_MAP = {
+    'frame_0': 'frame_00.png',
+    'frame_1': 'frame_01_gt.png',
+    'frame_2': 'frame_02.png',
+}
+def main(unused_argv):
+  """Creates and runs a Beam pipeline to write frame triplets as a TFRecord."""
+  # Collect the list of folder paths containing the input and golden frames.
+  triplets_list = tf.io.gfile.listdir(_INPUT_DIR.value)
+  triplet_dicts = []
+  for triplet in triplets_list:
+    triplet_dicts.append({
+        image_key: os.path.join(_INPUT_DIR.value, triplet, image_basename)
+        for image_key, image_basename in _INTERPOLATOR_IMAGES_MAP.items()
+    })
+  p = beam.Pipeline('DirectRunner')
+  (p | 'ReadInputTripletDicts' >> beam.Create(triplet_dicts)  # pylint: disable=expression-not-assigned
+   | 'GenerateSingleExample' >> beam.ParDo(
+       util.ExampleGenerator(_INTERPOLATOR_IMAGES_MAP))
+   | 'WriteToTFRecord' >> beam.io.tfrecordio.WriteToTFRecord(
+       file_path_prefix=_OUTPUT_TFRECORD_FILEPATH.value,
+       num_shards=_NUM_SHARDS.value,
+       coder=beam.coders.BytesCoder()))
+  result = p.run()
+  result.wait_until_finish()
+  logging.info('Succeeded in creating the output TFRecord file: \'%s@%s\'.',
+    _OUTPUT_TFRECORD_FILEPATH.value, str(_NUM_SHARDS.value))
+if __name__ == '__main__':
+  app.run(main)

datasets/create_vimeo90K_tfrecord.py ADDED Viewed

	@@ -0,0 +1,167 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Beam pipeline that generates Vimeo-90K (train or test) triplet TFRecords.
+Vimeo-90K dataset is built upon 5,846 videos downloaded from vimeo.com. The list
+of the original video links are available here:
+https://github.com/anchen1011/toflow/blob/master/data/original_vimeo_links.txt.
+Each video is further cropped into a fixed spatial size of (448 x 256) to create
+89,000 video clips.
+The Vimeo-90K dataset is designed for four video processing tasks. This script
+creates the TFRecords of frame triplets for frame interpolation task.
+Temporal frame interpolation triplet dataset:
+  - 73,171 triplets of size (448x256) extracted from 15K subsets of Vimeo-90K.
+  - The triplets are pre-split into (train,test) = (51313,3782)
+  - Download links:
+    Test-set: http://data.csail.mit.edu/tofu/testset/vimeo_interp_test.zip
+    Train+test-set: http://data.csail.mit.edu/tofu/dataset/vimeo_triplet.zip
+For more information, see the arXiv paper, project page or the GitHub link.
+@article{xue17toflow,
+  author = {Xue, Tianfan and
+            Chen, Baian and
+            Wu, Jiajun and
+            Wei, Donglai and
+            Freeman, William T},
+  title = {Video Enhancement with Task-Oriented Flow},
+  journal = {arXiv},
+  year = {2017}
+}
+Project: http://toflow.csail.mit.edu/
+GitHub: https://github.com/anchen1011/toflow
+Inputs to the script are (1) the directory to the downloaded and unzipped folder
+(2) the filepath of the text-file that lists the subfolders of the triplets.
+Output TFRecord is a tf.train.Example proto of each image triplet.
+The feature_map takes the form:
+  feature_map {
+      'frame_0/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_0/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_0/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_0/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_1/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_1/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_2/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_2/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0)
+      'path':
+          tf.io.FixedLenFeature((), tf.string, default_value='')
+  }
+Usage example:
+  python3 -m frame_interpolation.datasets.create_vimeo90K_tfrecord \
+    --input_dir=<root folder of vimeo90K dataset> \
+    --input_triplet_list_filepath=<filepath of tri_{test|train}list.txt> \
+    --output_tfrecord_filepath=<output tfrecord filepath>
+"""
+import os
+from . import util
+from absl import app
+from absl import flags
+from absl import logging
+import apache_beam as beam
+import numpy as np
+import tensorflow as tf
+_INPUT_DIR = flags.DEFINE_string(
+    'input_dir',
+    default='/path/to/raw_vimeo_interp/sequences',
+    help='Path to the root directory of the vimeo frame interpolation dataset. '
+    'We expect the data to have been downloaded and unzipped.\n'
+    'Folder structures:\n'
+    '| raw_vimeo_dataset/\n'
+    '|  sequences/\n'
+    '|  |  00001\n'
+    '|  |  |  0389/\n'
+    '|  |  |  |  im1.png\n'
+    '|  |  |  |  im2.png\n'
+    '|  |  |  |  im3.png\n'
+    '|  |  |  ...\n'
+    '|  |  00002/\n'
+    '|  |  ...\n'
+    '|  readme.txt\n'
+    '|  tri_trainlist.txt\n'
+    '|  tri_testlist.txt \n')
+_INTPUT_TRIPLET_LIST_FILEPATH = flags.DEFINE_string(
+    'input_triplet_list_filepath',
+    default='/path/to/raw_vimeo_dataset/tri_{test|train}list.txt',
+    help='Text file containing a list of sub-directories of input triplets.')
+_OUTPUT_TFRECORD_FILEPATH = flags.DEFINE_string(
+    'output_tfrecord_filepath',
+    default=None,
+    help='Filepath to the output TFRecord file.')
+_NUM_SHARDS = flags.DEFINE_integer('num_shards',
+    default=200, # set to 3 for vimeo_test, and 200 for vimeo_train.
+    help='Number of shards used for the output.')
+# Image key -> basename for frame interpolator: start / middle / end frames.
+_INTERPOLATOR_IMAGES_MAP = {
+    'frame_0': 'im1.png',
+    'frame_1': 'im2.png',
+    'frame_2': 'im3.png',
+}
+def main(unused_argv):
+  """Creates and runs a Beam pipeline to write frame triplets as a TFRecord."""
+  with tf.io.gfile.GFile(_INTPUT_TRIPLET_LIST_FILEPATH.value, 'r') as fid:
+    triplets_list = np.loadtxt(fid, dtype=str)
+  triplet_dicts = []
+  for triplet in triplets_list:
+    triplet_dict = {
+        image_key: os.path.join(_INPUT_DIR.value, triplet, image_basename)
+        for image_key, image_basename in _INTERPOLATOR_IMAGES_MAP.items()
+    }
+    triplet_dicts.append(triplet_dict)
+  p = beam.Pipeline('DirectRunner')
+  (p | 'ReadInputTripletDicts' >> beam.Create(triplet_dicts)  # pylint: disable=expression-not-assigned
+   | 'GenerateSingleExample' >> beam.ParDo(
+       util.ExampleGenerator(_INTERPOLATOR_IMAGES_MAP))
+   | 'WriteToTFRecord' >> beam.io.tfrecordio.WriteToTFRecord(
+       file_path_prefix=_OUTPUT_TFRECORD_FILEPATH.value,
+       num_shards=_NUM_SHARDS.value,
+       coder=beam.coders.BytesCoder()))
+  result = p.run()
+  result.wait_until_finish()
+  logging.info('Succeeded in creating the output TFRecord file: \'%s@%s\'.',
+    _OUTPUT_TFRECORD_FILEPATH.value, str(_NUM_SHARDS.value))
+if __name__ == '__main__':
+  app.run(main)

datasets/create_xiph_tfrecord.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Beam pipeline that generates Xiph triplet TFRecords.
+Xiph is a frame sequence dataset commonly used to assess video compression. See
+here: https://media.xiph.org/video/derf/
+The SoftSplat paper selected eight 4K clips with the most amount of motion and
+extracted the first 100 frames from each clip. Each frame is then either resized
+from 4K to 2K, or a 2K center crop from them is performed before interpolating
+the even frames from the odd frames. These datasets are denoted as `Xiph-2K`
+and `Xiph-4K` respectively. For more information see the project page:
+https://github.com/sniklaus/softmax-splatting
+Input is the root folder that contains the 800 frames of the eight clips. Set
+center_crop_factor=2 and scale_factor=1 to generate `Xiph-4K`,and scale_factor=2
+, center_crop_factor=1 to generate `Xiph-2K`. The scripts defaults to `Xiph-2K`.
+Output TFRecord is a tf.train.Example proto of each image triplet.
+The feature_map takes the form:
+  feature_map {
+      'frame_0/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_0/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_0/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_0/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_1/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_1/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_2/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_2/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'path':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+  }
+Usage example:
+  python3 -m frame_interpolation.datasets.create_xiph_tfrecord \
+    --input_dir=<root folder of xiph dataset> \
+    --scale_factor=<scale factor for image resizing, default=2> \
+    --center_crop_factor=<center cropping factor, default=1> \
+    --output_tfrecord_filepath=<output tfrecord filepath>
+"""
+import os
+from . import util
+from absl import app
+from absl import flags
+from absl import logging
+import apache_beam as beam
+import tensorflow as tf
+_INPUT_DIR = flags.DEFINE_string(
+    'input_dir',
+    default='/root/path/to/selected/xiph/clips',
+    help='Path to the root directory of the `Xiph` interpolation evaluation '
+    'data. We expect the data to have been downloaded and unzipped.')
+_CENTER_CROP_FACTOR = flags.DEFINE_integer(
+    'center_crop_factor',
+    default=1,
+    help='Factor to center crop image. If set to 2, an image of the same '
+    'resolution as the inputs but half the size is created.')
+_SCALE_FACTOR = flags.DEFINE_integer(
+    'scale_factor',
+    default=2,
+    help='Factor to downsample frames.')
+_NUM_CLIPS = flags.DEFINE_integer(
+    'num_clips', default=8, help='Number of clips.')
+_NUM_FRAMES = flags.DEFINE_integer(
+    'num_frames', default=100, help='Number of frames per clip.')
+_OUTPUT_TFRECORD_FILEPATH = flags.DEFINE_string(
+    'output_tfrecord_filepath',
+    default=None,
+    required=True,
+    help='Filepath to the output TFRecord file.')
+_NUM_SHARDS = flags.DEFINE_integer('num_shards',
+    default=2,
+    help='Number of shards used for the output.')
+# Image key -> offset for frame interpolator: start / middle / end frame offset.
+_INTERPOLATOR_IMAGES_MAP = {
+    'frame_0': -1,
+    'frame_1': 0,
+    'frame_2': 1,
+}
+def main(unused_argv):
+  """Creates and runs a Beam pipeline to write frame triplets as a TFRecord."""
+  # Collect the list of frame filenames.
+  frames_list = sorted(tf.io.gfile.listdir(_INPUT_DIR.value))
+  # Collect the triplets, even frames serving as golden to interpolate odds.
+  triplets_dict = []
+  for clip_index in range(_NUM_CLIPS.value):
+    for frame_index in range(1, _NUM_FRAMES.value - 1, 2):
+      index = clip_index * _NUM_FRAMES.value + frame_index
+      triplet_dict = {
+          image_key: os.path.join(_INPUT_DIR.value,
+                                  frames_list[index + image_offset])
+          for image_key, image_offset in _INTERPOLATOR_IMAGES_MAP.items()
+      }
+      triplets_dict.append(triplet_dict)
+  p = beam.Pipeline('DirectRunner')
+  (p | 'ReadInputTripletDicts' >> beam.Create(triplets_dict)  # pylint: disable=expression-not-assigned
+   | 'GenerateSingleExample' >> beam.ParDo(
+       util.ExampleGenerator(_INTERPOLATOR_IMAGES_MAP, _SCALE_FACTOR.value,
+                             _CENTER_CROP_FACTOR.value))
+   | 'WriteToTFRecord' >> beam.io.tfrecordio.WriteToTFRecord(
+       file_path_prefix=_OUTPUT_TFRECORD_FILEPATH.value,
+       num_shards=_NUM_SHARDS.value,
+       coder=beam.coders.BytesCoder()))
+  result = p.run()
+  result.wait_until_finish()
+  logging.info('Succeeded in creating the output TFRecord file: \'%s@%s\'.',
+    _OUTPUT_TFRECORD_FILEPATH.value, str(_NUM_SHARDS.value))
+if __name__ == '__main__':
+  app.run(main)

datasets/util.py ADDED Viewed

	@@ -0,0 +1,204 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions for creating a tf.train.Example proto of image triplets."""
+import io
+import os
+from typing import Any, List, Mapping, Optional
+from absl import logging
+import apache_beam as beam
+import numpy as np
+import PIL.Image
+import six
+from skimage import transform
+import tensorflow as tf
+_UINT8_MAX_F = float(np.iinfo(np.uint8).max)
+_GAMMA = 2.2
+def _resample_image(image: np.ndarray, resample_image_width: int,
+                    resample_image_height: int) -> np.ndarray:
+  """Re-samples and returns an `image` to be `resample_image_size`."""
+  # Convert image from uint8 gamma [0..255] to float linear [0..1].
+  image = image.astype(np.float32) / _UINT8_MAX_F
+  image = np.power(np.clip(image, 0, 1), _GAMMA)
+  # Re-size the image
+  resample_image_size = (resample_image_height, resample_image_width)
+  image = transform.resize_local_mean(image, resample_image_size)
+  # Convert back from float linear [0..1] to uint8 gamma [0..255].
+  image = np.power(np.clip(image, 0, 1), 1.0 / _GAMMA)
+  image = np.clip(image * _UINT8_MAX_F + 0.5, 0.0,
+                  _UINT8_MAX_F).astype(np.uint8)
+  return image
+def generate_image_triplet_example(
+    triplet_dict: Mapping[str, str],
+    scale_factor: int = 1,
+    center_crop_factor: int = 1) -> Optional[tf.train.Example]:
+  """Generates and serializes a tf.train.Example proto from an image triplet.
+  Default setting creates a triplet Example with the input images unchanged.
+  Images are processed in the order of center-crop then downscale.
+  Args:
+    triplet_dict: A dict of image key to filepath of the triplet images.
+    scale_factor: An integer scale factor to isotropically downsample images.
+    center_crop_factor: An integer cropping factor to center crop images with
+      the original resolution but isotropically downsized by the factor.
+  Returns:
+    tf.train.Example proto, or None upon error.
+  Raises:
+    ValueError if triplet_dict length is different from three or the scale input
+    arguments are non-positive.
+  """
+  if len(triplet_dict) != 3:
+    raise ValueError(
+        f'Length of triplet_dict must be exactly 3, not {len(triplet_dict)}.')
+  if scale_factor <= 0 or center_crop_factor <= 0:
+    raise ValueError(f'(scale_factor, center_crop_factor) must be positive, '
+                     f'Not ({scale_factor}, {center_crop_factor}).')
+  feature = {}
+  # Keep track of the path where the images came from for debugging purposes.
+  mid_frame_path = os.path.dirname(triplet_dict['frame_1'])
+  feature['path'] = tf.train.Feature(
+      bytes_list=tf.train.BytesList(value=[six.ensure_binary(mid_frame_path)]))
+  for image_key, image_path in triplet_dict.items():
+    if not tf.io.gfile.exists(image_path):
+      logging.error('File not found: %s', image_path)
+      return None
+    # Note: we need both the raw bytes and the image size.
+    # PIL.Image does not expose a method to grab the original bytes.
+    # (Also it is not aware of non-local file systems.)
+    # So we read with tf.io.gfile.GFile to get the bytes, and then wrap the
+    # bytes in BytesIO to let PIL.Image open the image.
+    try:
+      byte_array = tf.io.gfile.GFile(image_path, 'rb').read()
+    except tf.errors.InvalidArgumentError:
+      logging.exception('Cannot read image file: %s', image_path)
+      return None
+    try:
+      pil_image = PIL.Image.open(io.BytesIO(byte_array))
+    except PIL.UnidentifiedImageError:
+      logging.exception('Cannot decode image file: %s', image_path)
+      return None
+    width, height = pil_image.size
+    pil_image_format = pil_image.format
+    # Optionally center-crop images and downsize images
+    # by `center_crop_factor`.
+    if center_crop_factor > 1:
+      image = np.array(pil_image)
+      quarter_height = image.shape[0] // (2 * center_crop_factor)
+      quarter_width = image.shape[1] // (2 * center_crop_factor)
+      image = image[quarter_height:-quarter_height,
+                    quarter_width:-quarter_width, :]
+      pil_image = PIL.Image.fromarray(image)
+      # Update image properties.
+      height, width, _ = image.shape
+      buffer = io.BytesIO()
+      try:
+        pil_image.save(buffer, format='PNG')
+      except OSError:
+        logging.exception('Cannot encode image file: %s', image_path)
+        return None
+      byte_array = buffer.getvalue()
+    # Optionally downsample images by `scale_factor`.
+    if scale_factor > 1:
+      image = np.array(pil_image)
+      image = _resample_image(image, image.shape[1] // scale_factor,
+                              image.shape[0] // scale_factor)
+      pil_image = PIL.Image.fromarray(image)
+      # Update image properties.
+      height, width, _ = image.shape
+      buffer = io.BytesIO()
+      try:
+        pil_image.save(buffer, format='PNG')
+      except OSError:
+        logging.exception('Cannot encode image file: %s', image_path)
+        return None
+      byte_array = buffer.getvalue()
+    # Create tf Features.
+    image_feature = tf.train.Feature(
+        bytes_list=tf.train.BytesList(value=[byte_array]))
+    height_feature = tf.train.Feature(
+        int64_list=tf.train.Int64List(value=[height]))
+    width_feature = tf.train.Feature(
+        int64_list=tf.train.Int64List(value=[width]))
+    encoding = tf.train.Feature(
+        bytes_list=tf.train.BytesList(
+            value=[six.ensure_binary(pil_image_format.lower())]))
+    # Update feature map.
+    feature[f'{image_key}/encoded'] = image_feature
+    feature[f'{image_key}/format'] = encoding
+    feature[f'{image_key}/height'] = height_feature
+    feature[f'{image_key}/width'] = width_feature
+  # Create tf Example.
+  features = tf.train.Features(feature=feature)
+  example = tf.train.Example(features=features)
+  return example
+class ExampleGenerator(beam.DoFn):
+  """Generate a tf.train.Example per input image triplet filepaths."""
+  def __init__(self,
+               images_map: Mapping[str, Any],
+               scale_factor: int = 1,
+               center_crop_factor: int = 1):
+    """Initializes the map of 3 images to add to each tf.train.Example.
+    Args:
+      images_map: Map from image key to image filepath.
+      scale_factor: A scale factor to downsample frames.
+      center_crop_factor: A factor to centercrop and downsize frames.
+    """
+    super().__init__()
+    self._images_map = images_map
+    self._scale_factor = scale_factor
+    self._center_crop_factor = center_crop_factor
+  def process(self, triplet_dict: Mapping[str, str]) -> List[bytes]:
+    """Generates a serialized tf.train.Example for a triplet of images.
+    Args:
+      triplet_dict: A dict of image key to filepath of the triplet images.
+    Returns:
+      A serialized tf.train.Example proto. No shuffling is applied.
+    """
+    example = generate_image_triplet_example(triplet_dict, self._scale_factor,
+                                             self._center_crop_factor)
+    if example:
+      return [example.SerializeToString()]
+    else:
+      return []

eval/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

eval/config/middlebury.gin ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+experiment.name = 'middlebury'
+evaluation.max_examples = -1
+evaluation.metrics = ['l1', 'l2', 'ssim', 'psnr']
+evaluation.tfrecord = 'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/middlebury_other.tfrecord@3'

eval/config/ucf101.gin ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+experiment.name = 'ucf101'
+evaluation.max_examples = -1
+evaluation.metrics = ['l1', 'l2', 'ssim', 'psnr']
+evaluation.tfrecord = 'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/UCF101_interp_test.tfrecord@2'

eval/config/vimeo_90K.gin ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+experiment.name = 'vimeo_90K'
+evaluation.max_examples = -1
+evaluation.metrics = ['l1', 'l2', 'ssim', 'psnr']
+evaluation.tfrecord = 'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/vimeo_interp_test.tfrecord@3'

eval/config/xiph_2K.gin ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+experiment.name = 'xiph_2K'
+evaluation.max_examples = -1
+evaluation.metrics = ['l1', 'l2', 'ssim', 'psnr']
+evaluation.tfrecord = 'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/xiph_2K.tfrecord@2'

eval/config/xiph_4K.gin ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+experiment.name = 'xiph_4K'
+evaluation.max_examples = -1
+evaluation.metrics = ['l1', 'l2', 'ssim', 'psnr']
+evaluation.tfrecord = 'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/xiph_4K.tfrecord@2'

eval/eval_cli.py ADDED Viewed

	@@ -0,0 +1,216 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Evaluate the frame interpolation model from a tfrecord and store results.
+This script runs the inference on examples in a tfrecord and generates images
+and numeric results according to the gin config. For details, see the
+run_evaluation() function below.
+Usage example:
+  python3 -m frame_interpolation.eval.eval_cli -- \
+    --gin_config <path to eval_dataset.gin> \
+    --base_folder <the root directory to all training sessions> \
+    --label < the foldername of the training session>
+or
+  python3 -m frame_interpolation.eval.eval_cli -- \
+    --gin_config <path to eval_dataset.gin> \
+    --model_path <The filepath of the TF2 saved model>
+The output is saved at the parent directory of the `model_path`:
+<parent directory of model_path>/batch_eval.
+The evaluation is run on a GPU by default. Add the `--mode` argument for others.
+"""
+import collections
+import os
+from typing import Any, Dict
+from . import util
+from absl import app
+from absl import flags
+from absl import logging
+import gin.tf
+from ..losses import losses
+import numpy as np
+import tensorflow as tf
+from ..training import data_lib
+_GIN_CONFIG = flags.DEFINE_string('gin_config', None, 'Gin config file.')
+_LABEL = flags.DEFINE_string(
+    'label', None, 'Descriptive label for the training session to eval.')
+_BASE_FOLDER = flags.DEFINE_string('base_folder', None,
+                                   'Root folder of training sessions.')
+_MODEL_PATH = flags.DEFINE_string(
+    name='model_path',
+    default=None,
+    help='The path of the TF2 saved model to use. If _MODEL_PATH argument is '
+    'directly specified, _LABEL and _BASE_FOLDER arguments will be ignored.')
+_OUTPUT_FRAMES = flags.DEFINE_boolean(
+    name='output_frames',
+    default=False,
+    help='If true, saves the the inputs, groud-truth and interpolated frames.')
+_MODE = flags.DEFINE_enum('mode', 'gpu', ['cpu', 'gpu'],
+                          'Device to run evaluations.')
+@gin.configurable('experiment')
+def _get_experiment_config(name) -> Dict[str, Any]:
+  """Fetches the gin config."""
+  return {
+      'name': name,
+  }
+def _set_visible_devices():
+  """Set the visible devices according to running mode."""
+  mode_devices = tf.config.list_physical_devices(_MODE.value.upper())
+  tf.config.set_visible_devices([], 'GPU')
+  tf.config.set_visible_devices([], 'TPU')
+  tf.config.set_visible_devices(mode_devices, _MODE.value.upper())
+  return
+@gin.configurable('evaluation')
+def run_evaluation(model_path, tfrecord, output_dir, max_examples, metrics):
+  """Runs the eval loop for examples in the tfrecord.
+  The evaluation is run for the first 'max_examples' number of examples, and
+  resulting images are stored into the given output_dir.  Any tensor that
+  appears like an image is stored with its name -- this may include intermediate
+  results, depending on what the model outputs.
+  Additionally, numeric results are stored into results.csv file within the same
+  directory. This includes per-example metrics and the mean across the whole
+  dataset.
+  Args:
+    model_path: Directory TF2 saved model.
+    tfrecord: Directory to the tfrecord eval data.
+    output_dir: Directory to store the results into.
+    max_examples: Maximum examples to evaluate.
+    metrics: The names of loss functions to use.
+  """
+  model = tf.saved_model.load(model_path)
+  # Store a 'readme.txt' that contains information on where the data came from.
+  with tf.io.gfile.GFile(os.path.join(output_dir, 'readme.txt'), mode='w') as f:
+    print('Results for:', file=f)
+    print(f' model:   {model_path}', file=f)
+    print(f' tfrecord: {tfrecord}', file=f)
+  with tf.io.gfile.GFile(
+      os.path.join(output_dir, 'results.csv'), mode='w') as csv_file:
+    test_losses = losses.test_losses(metrics, [
+        1.0,
+    ] * len(metrics))
+    title_row = ['key'] + list(test_losses)
+    print(', '.join(title_row), file=csv_file)
+    datasets = data_lib.create_eval_datasets(
+        batch_size=1,
+        files=[tfrecord],
+        names=[os.path.basename(output_dir)],
+        max_examples=max_examples)
+    dataset = datasets[os.path.basename(output_dir)]
+    all_losses = collections.defaultdict(list)
+    for example in dataset:
+      inputs = {
+          'x0': example['x0'],
+          'x1': example['x1'],
+          'time': example['time'][..., tf.newaxis],
+      }
+      prediction = model(inputs, training=False)
+      # Get the key from encoded mid-frame path.
+      path = example['path'][0].numpy().decode('utf-8')
+      key = path.rsplit('.', 1)[0].rsplit(os.sep)[-1]
+      # Combines both inputs and outputs into a single dictionary:
+      combined = {**prediction, **example} if _OUTPUT_FRAMES.value else {}
+      for name in combined:
+        image = combined[name]
+        if isinstance(image, tf.Tensor):
+          # This saves any tensor that has a shape that can be interpreted
+          # as an image, e.g. (1, H, W, C), where the batch dimension is always
+          # 1, H and W are the image height and width, and C is either 1 or 3
+          # (grayscale or color image).
+          if len(image.shape) == 4 and (image.shape[-1] == 1 or
+                                        image.shape[-1] == 3):
+            util.write_image(
+                os.path.join(output_dir, f'{key}_{name}.png'), image[0].numpy())
+      # Evaluate losses if the dataset has ground truth 'y', otherwise just do
+      # a visual eval.
+      if 'y' in example:
+        loss_values = []
+        # Clip interpolator output to the range [0,1]. Clipping is done only
+        # on the eval loop to get better metrics, but not on the training loop
+        # so gradients are not killed.
+        prediction['image'] = tf.clip_by_value(prediction['image'], 0., 1.)
+        for loss_name, (loss_value_fn, loss_weight_fn) in test_losses.items():
+          loss_value = loss_value_fn(example, prediction) * loss_weight_fn(0)
+          loss_values.append(loss_value.numpy())
+          all_losses[loss_name].append(loss_value.numpy())
+        print(f'{key}, {str(loss_values)[1:-1]}', file=csv_file)
+    if all_losses:
+      totals = [np.mean(all_losses[loss_name]) for loss_name in test_losses]
+      print(f'mean, {str(totals)[1:-1]}', file=csv_file)
+  totals_dict = {
+      loss_name: np.mean(all_losses[loss_name]) for loss_name in test_losses
+  }
+  logging.info('mean, %s', totals_dict)
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+  if _MODEL_PATH.value is not None:
+    model_path = _MODEL_PATH.value
+  else:
+    model_path = os.path.join(_BASE_FOLDER.value, _LABEL.value, 'saved_model')
+  gin.parse_config_files_and_bindings(
+      config_files=[_GIN_CONFIG.value],
+      bindings=None,
+      skip_unknown=True)
+  config = _get_experiment_config()  # pylint: disable=no-value-for-parameter
+  eval_name = config['name']
+  output_dir = os.path.join(
+      os.path.dirname(model_path), 'batch_eval', eval_name)
+  logging.info('Creating output_dir @ %s ...', output_dir)
+  # Copy config file to <base_folder>/<label>/batch_eval/<eval_name>/config.gin.
+  tf.io.gfile.makedirs(output_dir)
+  tf.io.gfile.copy(
+      _GIN_CONFIG.value, os.path.join(output_dir, 'config.gin'), overwrite=True)
+  _set_visible_devices()
+  logging.info('Evaluating %s on %s ...', eval_name, [
+      el.name.split('/physical_device:')[-1]
+      for el in tf.config.get_visible_devices()
+  ])
+  run_evaluation(model_path=model_path, output_dir=output_dir)  # pylint: disable=no-value-for-parameter
+  logging.info('Done. Evaluations saved @ %s.', output_dir)
+if __name__ == '__main__':
+  app.run(main)

eval/interpolator.py ADDED Viewed

	@@ -0,0 +1,209 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A wrapper class for running a frame interpolation TF2 saved model.
+Usage:
+  model_path='/tmp/saved_model/'
+  it = Interpolator(model_path)
+  result_batch = it.interpolate(image_batch_0, image_batch_1, batch_dt)
+  Where image_batch_1 and image_batch_2 are numpy tensors with TF standard
+  (B,H,W,C) layout, batch_dt is the sub-frame time in range [0,1], (B,) layout.
+"""
+from typing import List, Optional
+import numpy as np
+import tensorflow as tf
+def _pad_to_align(x, align):
+  """Pad image batch x so width and height divide by align.
+  Args:
+    x: Image batch to align.
+    align: Number to align to.
+  Returns:
+    1) An image padded so width % align == 0 and height % align == 0.
+    2) A bounding box that can be fed readily to tf.image.crop_to_bounding_box
+      to undo the padding.
+  """
+  # Input checking.
+  assert np.ndim(x) == 4
+  assert align > 0, 'align must be a positive number.'
+  height, width = x.shape[-3:-1]
+  height_to_pad = (align - height % align) if height % align != 0 else 0
+  width_to_pad = (align - width % align) if width % align != 0 else 0
+  bbox_to_pad = {
+      'offset_height': height_to_pad // 2,
+      'offset_width': width_to_pad // 2,
+      'target_height': height + height_to_pad,
+      'target_width': width + width_to_pad
+  }
+  padded_x = tf.image.pad_to_bounding_box(x, **bbox_to_pad)
+  bbox_to_crop = {
+      'offset_height': height_to_pad // 2,
+      'offset_width': width_to_pad // 2,
+      'target_height': height,
+      'target_width': width
+  }
+  return padded_x, bbox_to_crop
+def image_to_patches(image: np.ndarray, block_shape: List[int]) -> np.ndarray:
+  """Folds an image into patches and stacks along the batch dimension.
+  Args:
+    image: The input image of shape [B, H, W, C].
+    block_shape: The number of patches along the height and width to extract.
+      Each patch is shaped (H/block_shape[0], W/block_shape[1])
+  Returns:
+    The extracted patches shaped [num_blocks, patch_height, patch_width,...],
+      with num_blocks = block_shape[0] * block_shape[1].
+  """
+  block_height, block_width = block_shape
+  num_blocks = block_height * block_width
+  height, width, channel = image.shape[-3:]
+  patch_height, patch_width = height//block_height, width//block_width
+  assert height == (
+      patch_height * block_height
+  ), 'block_height=%d should evenly divide height=%d.'%(block_height, height)
+  assert width == (
+      patch_width * block_width
+  ), 'block_width=%d should evenly divide width=%d.'%(block_width, width)
+  patch_size = patch_height * patch_width
+  paddings = 2*[[0, 0]]
+  patches = tf.space_to_batch(image, [patch_height, patch_width], paddings)
+  patches = tf.split(patches, patch_size, 0)
+  patches = tf.stack(patches, axis=3)
+  patches = tf.reshape(patches,
+                       [num_blocks, patch_height, patch_width, channel])
+  return patches.numpy()
+def patches_to_image(patches: np.ndarray, block_shape: List[int]) -> np.ndarray:
+  """Unfolds patches (stacked along batch) into an image.
+  Args:
+    patches: The input patches, shaped [num_patches, patch_H, patch_W, C].
+    block_shape: The number of patches along the height and width to unfold.
+      Each patch assumed to be shaped (H/block_shape[0], W/block_shape[1]).
+  Returns:
+    The unfolded image shaped [B, H, W, C].
+  """
+  block_height, block_width = block_shape
+  paddings = 2 * [[0, 0]]
+  patch_height, patch_width, channel = patches.shape[-3:]
+  patch_size = patch_height * patch_width
+  patches = tf.reshape(patches,
+                       [1, block_height, block_width, patch_size, channel])
+  patches = tf.split(patches, patch_size, axis=3)
+  patches = tf.stack(patches, axis=0)
+  patches = tf.reshape(patches,
+                       [patch_size, block_height, block_width, channel])
+  image = tf.batch_to_space(patches, [patch_height, patch_width], paddings)
+  return image.numpy()
+class Interpolator:
+  """A class for generating interpolated frames between two input frames.
+  Uses TF2 saved model format.
+  """
+  def __init__(self, model_path: str,
+               align: Optional[int] = None,
+               block_shape: Optional[List[int]] = None) -> None:
+    """Loads a saved model.
+    Args:
+      model_path: Path to the saved model. If none are provided, uses the
+        default model.
+      align: 'If >1, pad the input size so it divides with this before
+        inference.'
+      block_shape: Number of patches along the (height, width) to sid-divide
+        input images.
+    """
+    self._model = tf.compat.v2.saved_model.load(model_path)
+    self._align = align or None
+    self._block_shape = block_shape or None
+  def interpolate(self, x0: np.ndarray, x1: np.ndarray,
+                  dt: np.ndarray) -> np.ndarray:
+    """Generates an interpolated frame between given two batches of frames.
+    All input tensors should be np.float32 datatype.
+    Args:
+      x0: First image batch. Dimensions: (batch_size, height, width, channels)
+      x1: Second image batch. Dimensions: (batch_size, height, width, channels)
+      dt: Sub-frame time. Range [0,1]. Dimensions: (batch_size,)
+    Returns:
+      The result with dimensions (batch_size, height, width, channels).
+    """
+    if self._align is not None:
+      x0, bbox_to_crop = _pad_to_align(x0, self._align)
+      x1, _ = _pad_to_align(x1, self._align)
+    inputs = {'x0': x0, 'x1': x1, 'time': dt[..., np.newaxis]}
+    result = self._model(inputs, training=False)
+    image = result['image']
+    if self._align is not None:
+      image = tf.image.crop_to_bounding_box(image, **bbox_to_crop)
+    return image.numpy()
+  def __call__(self, x0: np.ndarray, x1: np.ndarray,
+                  dt: np.ndarray) -> np.ndarray:
+    """Generates an interpolated frame between given two batches of frames.
+    All input tensors should be np.float32 datatype.
+    Args:
+      x0: First image batch. Dimensions: (batch_size, height, width, channels)
+      x1: Second image batch. Dimensions: (batch_size, height, width, channels)
+      dt: Sub-frame time. Range [0,1]. Dimensions: (batch_size,)
+    Returns:
+      The result with dimensions (batch_size, height, width, channels).
+    """
+    if self._block_shape is not None and np.prod(self._block_shape) > 1:
+      # Subdivide high-res images into managable non-overlapping patches.
+      x0_patches = image_to_patches(x0, self._block_shape)
+      x1_patches = image_to_patches(x1, self._block_shape)
+      # Run the interpolator on each patch pair.
+      output_patches = []
+      for image_0, image_1 in zip(x0_patches, x1_patches):
+        mid_patch = self.interpolate(image_0[np.newaxis, ...],
+                                     image_1[np.newaxis, ...], dt)
+        output_patches.append(mid_patch)
+      # Reconstruct interpolated image by stitching interpolated patches.
+      output_patches = np.concatenate(output_patches, axis=0)
+      return patches_to_image(output_patches, self._block_shape)
+    else:
+      # Invoke the interpolator once.
+      return self.interpolate(x0, x1, dt)

eval/interpolator_cli.py ADDED Viewed

	@@ -0,0 +1,197 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Runs the FILM frame interpolator on a pair of frames on beam.
+This script is used evaluate the output quality of the FILM Tensorflow frame
+interpolator. Optionally, it outputs a video of the interpolated frames.
+A beam pipeline for invoking the frame interpolator on a set of directories
+identified by a glob (--pattern). Each directory is expected to contain two
+input frames that are the inputs to the frame interpolator. If a directory has
+more than two frames, then each contiguous frame pair is treated as input to
+generate in-between frames.
+The output video is stored to interpolator.mp4 in each directory. The number of
+frames is determined by --times_to_interpolate, which controls the number of
+times the frame interpolator is invoked. When the number of input frames is 2,
+the number of output frames is 2^times_to_interpolate+1.
+This expects a directory structure such as:
+  <root directory of the eval>/01/frame1.png
+                                  frame2.png
+  <root directory of the eval>/02/frame1.png
+                                  frame2.png
+  <root directory of the eval>/03/frame1.png
+                                  frame2.png
+  ...
+And will produce:
+  <root directory of the eval>/01/interpolated_frames/frame0.png
+                                                      frame1.png
+                                                      frame2.png
+  <root directory of the eval>/02/interpolated_frames/frame0.png
+                                                      frame1.png
+                                                      frame2.png
+  <root directory of the eval>/03/interpolated_frames/frame0.png
+                                                      frame1.png
+                                                      frame2.png
+  ...
+And optionally will produce:
+  <root directory of the eval>/01/interpolated.mp4
+  <root directory of the eval>/02/interpolated.mp4
+  <root directory of the eval>/03/interpolated.mp4
+  ...
+Usage example:
+  python3 -m frame_interpolation.eval.interpolator_cli \
+    --model_path <path to TF2 saved model> \
+    --pattern "<root directory of the eval>/*" \
+    --times_to_interpolate <Number of times to interpolate>
+"""
+import functools
+import os
+from typing import List, Sequence
+from . import interpolator as interpolator_lib
+from . import util
+from absl import app
+from absl import flags
+from absl import logging
+import apache_beam as beam
+import mediapy as media
+import natsort
+import numpy as np
+import tensorflow as tf
+from tqdm.auto import tqdm
+# Controls TF_CCP log level.
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
+_PATTERN = flags.DEFINE_string(
+    name='pattern',
+    default=None,
+    help='The pattern to determine the directories with the input frames.',
+    required=True)
+_MODEL_PATH = flags.DEFINE_string(
+    name='model_path',
+    default=None,
+    help='The path of the TF2 saved model to use.')
+_TIMES_TO_INTERPOLATE = flags.DEFINE_integer(
+    name='times_to_interpolate',
+    default=5,
+    help='The number of times to run recursive midpoint interpolation. '
+    'The number of output frames will be 2^times_to_interpolate+1.')
+_FPS = flags.DEFINE_integer(
+    name='fps',
+    default=30,
+    help='Frames per second to play interpolated videos in slow motion.')
+_ALIGN = flags.DEFINE_integer(
+    name='align',
+    default=64,
+    help='If >1, pad the input size so it is evenly divisible by this value.')
+_BLOCK_HEIGHT = flags.DEFINE_integer(
+    name='block_height',
+    default=1,
+    help='An int >= 1, number of patches along height, '
+    'patch_height = height//block_height, should be evenly divisible.')
+_BLOCK_WIDTH = flags.DEFINE_integer(
+    name='block_width',
+    default=1,
+    help='An int >= 1, number of patches along width, '
+    'patch_width = width//block_width, should be evenly divisible.')
+_OUTPUT_VIDEO = flags.DEFINE_boolean(
+    name='output_video',
+    default=False,
+    help='If true, creates a video of the frames in the interpolated_frames/ '
+    'subdirectory')
+# Add other extensions, if not either.
+_INPUT_EXT = ['png', 'jpg', 'jpeg']
+def _output_frames(frames: List[np.ndarray], frames_dir: str):
+  """Writes PNG-images to a directory.
+  If frames_dir doesn't exist, it is created. If frames_dir contains existing
+  PNG-files, they are removed before saving the new ones.
+  Args:
+    frames: List of images to save.
+    frames_dir: The output directory to save the images.
+  """
+  if tf.io.gfile.isdir(frames_dir):
+    old_frames = tf.io.gfile.glob(f'{frames_dir}/frame_*.png')
+    if old_frames:
+      logging.info('Removing existing frames from %s.', frames_dir)
+      for old_frame in old_frames:
+        tf.io.gfile.remove(old_frame)
+  else:
+    tf.io.gfile.makedirs(frames_dir)
+  for idx, frame in tqdm(
+      enumerate(frames), total=len(frames), ncols=100, colour='green'):
+    util.write_image(f'{frames_dir}/frame_{idx:03d}.png', frame)
+  logging.info('Output frames saved in %s.', frames_dir)
+class ProcessDirectory(beam.DoFn):
+  """DoFn for running the interpolator on a single directory at the time."""
+  def setup(self):
+    self.interpolator = interpolator_lib.Interpolator(
+        _MODEL_PATH.value, _ALIGN.value,
+        [_BLOCK_HEIGHT.value, _BLOCK_WIDTH.value])
+    if _OUTPUT_VIDEO.value:
+      ffmpeg_path = util.get_ffmpeg_path()
+      media.set_ffmpeg(ffmpeg_path)
+  def process(self, directory: str):
+    input_frames_list = [
+        natsort.natsorted(tf.io.gfile.glob(f'{directory}/*.{ext}'))
+        for ext in _INPUT_EXT
+    ]
+    input_frames = functools.reduce(lambda x, y: x + y, input_frames_list)
+    logging.info('Generating in-between frames for %s.', directory)
+    frames = list(
+        util.interpolate_recursively_from_files(
+            input_frames, _TIMES_TO_INTERPOLATE.value, self.interpolator))
+    _output_frames(frames, f'{directory}/interpolated_frames')
+    if _OUTPUT_VIDEO.value:
+      media.write_video(f'{directory}/interpolated.mp4', frames, fps=_FPS.value)
+      logging.info('Output video saved at %s/interpolated.mp4.', directory)
+def _run_pipeline() -> None:
+  directories = tf.io.gfile.glob(_PATTERN.value)
+  pipeline = beam.Pipeline('DirectRunner')
+  (pipeline | 'Create directory names' >> beam.Create(directories)  # pylint: disable=expression-not-assigned
+   | 'Process directories' >> beam.ParDo(ProcessDirectory()))
+  result = pipeline.run()
+  result.wait_until_finish()
+def main(argv: Sequence[str]) -> None:
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+  _run_pipeline()
+if __name__ == '__main__':
+  app.run(main)

eval/interpolator_test.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""A test script for mid frame interpolation from two input frames.
+Usage example:
+ python3 -m frame_interpolation.eval.interpolator_test \
+   --frame1 <filepath of the first frame> \
+   --frame2 <filepath of the second frame> \
+   --model_path <The filepath of the TF2 saved model to use>
+The output is saved to <the directory of the input frames>/output_frame.png. If
+`--output_frame` filepath is provided, it will be used instead.
+"""
+import os
+from typing import Sequence
+from . import interpolator as interpolator_lib
+from . import util
+from absl import app
+from absl import flags
+import numpy as np
+# Controls TF_CCP log level.
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
+_FRAME1 = flags.DEFINE_string(
+    name='frame1',
+    default=None,
+    help='The filepath of the first input frame.',
+    required=True)
+_FRAME2 = flags.DEFINE_string(
+    name='frame2',
+    default=None,
+    help='The filepath of the second input frame.',
+    required=True)
+_MODEL_PATH = flags.DEFINE_string(
+    name='model_path',
+    default=None,
+    help='The path of the TF2 saved model to use.')
+_OUTPUT_FRAME = flags.DEFINE_string(
+    name='output_frame',
+    default=None,
+    help='The output filepath of the interpolated mid-frame.')
+_ALIGN = flags.DEFINE_integer(
+    name='align',
+    default=64,
+    help='If >1, pad the input size so it is evenly divisible by this value.')
+_BLOCK_HEIGHT = flags.DEFINE_integer(
+    name='block_height',
+    default=1,
+    help='An int >= 1, number of patches along height, '
+    'patch_height = height//block_height, should be evenly divisible.')
+_BLOCK_WIDTH = flags.DEFINE_integer(
+    name='block_width',
+    default=1,
+    help='An int >= 1, number of patches along width, '
+    'patch_width = width//block_width, should be evenly divisible.')
+def _run_interpolator() -> None:
+  """Writes interpolated mid frame from a given two input frame filepaths."""
+  interpolator = interpolator_lib.Interpolator(
+      model_path=_MODEL_PATH.value,
+      align=_ALIGN.value,
+      block_shape=[_BLOCK_HEIGHT.value, _BLOCK_WIDTH.value])
+  # First batched image.
+  image_1 = util.read_image(_FRAME1.value)
+  image_batch_1 = np.expand_dims(image_1, axis=0)
+  # Second batched image.
+  image_2 = util.read_image(_FRAME2.value)
+  image_batch_2 = np.expand_dims(image_2, axis=0)
+  # Batched time.
+  batch_dt = np.full(shape=(1,), fill_value=0.5, dtype=np.float32)
+  # Invoke the model for one mid-frame interpolation.
+  mid_frame = interpolator(image_batch_1, image_batch_2, batch_dt)[0]
+  # Write interpolated mid-frame.
+  mid_frame_filepath = _OUTPUT_FRAME.value
+  if not mid_frame_filepath:
+    mid_frame_filepath = f'{os.path.dirname(_FRAME1.value)}/output_frame.png'
+  util.write_image(mid_frame_filepath, mid_frame)
+def main(argv: Sequence[str]) -> None:
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+  _run_interpolator()
+if __name__ == '__main__':
+  app.run(main)

eval/util.py ADDED Viewed

	@@ -0,0 +1,162 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions for frame interpolation on a set of video frames."""
+import os
+import shutil
+from typing import Generator, Iterable, List, Optional
+from . import interpolator as interpolator_lib
+import numpy as np
+import tensorflow as tf
+from tqdm import tqdm
+_UINT8_MAX_F = float(np.iinfo(np.uint8).max)
+_CONFIG_FFMPEG_NAME_OR_PATH = 'ffmpeg'
+def read_image(filename: str) -> np.ndarray:
+  """Reads an sRgb 8-bit image.
+  Args:
+    filename: The input filename to read.
+  Returns:
+    A float32 3-channel (RGB) ndarray with colors in the [0..1] range.
+  """
+  image_data = tf.io.read_file(filename)
+  image = tf.io.decode_image(image_data, channels=3)
+  image_numpy = tf.cast(image, dtype=tf.float32).numpy()
+  return image_numpy / _UINT8_MAX_F
+def write_image(filename: str, image: np.ndarray) -> None:
+  """Writes a float32 3-channel RGB ndarray image, with colors in range [0..1].
+  Args:
+    filename: The output filename to save.
+    image: A float32 3-channel (RGB) ndarray with colors in the [0..1] range.
+  """
+  image_in_uint8_range = np.clip(image * _UINT8_MAX_F, 0.0, _UINT8_MAX_F)
+  image_in_uint8 = (image_in_uint8_range + 0.5).astype(np.uint8)
+  extension = os.path.splitext(filename)[1]
+  if extension == '.jpg':
+    image_data = tf.io.encode_jpeg(image_in_uint8)
+  else:
+    image_data = tf.io.encode_png(image_in_uint8)
+  tf.io.write_file(filename, image_data)
+def _recursive_generator(
+    frame1: np.ndarray, frame2: np.ndarray, num_recursions: int,
+    interpolator: interpolator_lib.Interpolator,
+    bar: Optional[tqdm] = None
+) -> Generator[np.ndarray, None, None]:
+  """Splits halfway to repeatedly generate more frames.
+  Args:
+    frame1: Input image 1.
+    frame2: Input image 2.
+    num_recursions: How many times to interpolate the consecutive image pairs.
+    interpolator: The frame interpolator instance.
+  Yields:
+    The interpolated frames, including the first frame (frame1), but excluding
+    the final frame2.
+  """
+  if num_recursions == 0:
+    yield frame1
+  else:
+    # Adds the batch dimension to all inputs before calling the interpolator,
+    # and remove it afterwards.
+    time = np.full(shape=(1,), fill_value=0.5, dtype=np.float32)
+    mid_frame = interpolator(frame1[np.newaxis, ...], frame2[np.newaxis, ...],
+                             time)[0]
+    bar.update(1) if bar is not None else bar
+    yield from _recursive_generator(frame1, mid_frame, num_recursions - 1,
+                                    interpolator, bar)
+    yield from _recursive_generator(mid_frame, frame2, num_recursions - 1,
+                                    interpolator, bar)
+def interpolate_recursively_from_files(
+    frames: List[str], times_to_interpolate: int,
+    interpolator: interpolator_lib.Interpolator) -> Iterable[np.ndarray]:
+  """Generates interpolated frames by repeatedly interpolating the midpoint.
+  Loads the files on demand and uses the yield paradigm to return the frames
+  to allow streamed processing of longer videos.
+  Recursive interpolation is useful if the interpolator is trained to predict
+  frames at midpoint only and is thus expected to perform poorly elsewhere.
+  Args:
+    frames: List of input frames. Expected shape (H, W, 3). The colors should be
+      in the range[0, 1] and in gamma space.
+    times_to_interpolate: Number of times to do recursive midpoint
+      interpolation.
+    interpolator: The frame interpolation model to use.
+  Yields:
+    The interpolated frames (including the inputs).
+  """
+  n = len(frames)
+  num_frames = (n - 1) * (2**(times_to_interpolate) - 1)
+  bar = tqdm(total=num_frames, ncols=100, colour='green')
+  for i in range(1, n):
+    yield from _recursive_generator(
+        read_image(frames[i - 1]), read_image(frames[i]), times_to_interpolate,
+        interpolator, bar)
+  # Separately yield the final frame.
+  yield read_image(frames[-1])
+def interpolate_recursively_from_memory(
+    frames: List[np.ndarray], times_to_interpolate: int,
+    interpolator: interpolator_lib.Interpolator) -> Iterable[np.ndarray]:
+  """Generates interpolated frames by repeatedly interpolating the midpoint.
+  This is functionally equivalent to interpolate_recursively_from_files(), but
+  expects the inputs frames in memory, instead of loading them on demand.
+  Recursive interpolation is useful if the interpolator is trained to predict
+  frames at midpoint only and is thus expected to perform poorly elsewhere.
+  Args:
+    frames: List of input frames. Expected shape (H, W, 3). The colors should be
+      in the range[0, 1] and in gamma space.
+    times_to_interpolate: Number of times to do recursive midpoint
+      interpolation.
+    interpolator: The frame interpolation model to use.
+  Yields:
+    The interpolated frames (including the inputs).
+  """
+  n = len(frames)
+  num_frames = (n - 1) * (2**(times_to_interpolate) - 1)
+  bar = tqdm(total=num_frames, ncols=100, colour='green')
+  for i in range(1, n):
+    yield from _recursive_generator(frames[i - 1], frames[i],
+                                    times_to_interpolate, interpolator, bar)
+  # Separately yield the final frame.
+  yield frames[-1]
+def get_ffmpeg_path() -> str:
+  path = shutil.which(_CONFIG_FFMPEG_NAME_OR_PATH)
+  if not path:
+    raise RuntimeError(
+        f"Program '{_CONFIG_FFMPEG_NAME_OR_PATH}' is not found;"
+        " perhaps install ffmpeg using 'apt-get install ffmpeg'.")
+  return path

losses/losses.py ADDED Viewed

	@@ -0,0 +1,266 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Loss functions used to train the FILM interpolation model.
+The losses for training and test loops are configurable via gin. Training can
+use more than one loss function. Test loop can also evaluate one ore more loss
+functions, each of which can be summarized separately.
+"""
+from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple
+from . import vgg19_loss as vgg19
+import gin.tf
+import numpy as np
+import tensorflow as tf
+@gin.configurable('vgg', denylist=['example', 'prediction'])
+def vgg_loss(example: Mapping[str, tf.Tensor],
+             prediction: Mapping[str, tf.Tensor],
+             vgg_model_file: str,
+             weights: Optional[List[float]] = None) -> tf.Tensor:
+  """Perceptual loss for images in [0,1] color range.
+  Args:
+    example: A dictionary with the ground truth image as 'y'.
+    prediction: The prediction dictionary with the image as 'image'.
+    vgg_model_file: The path containing the vgg19 weights in MATLAB format.
+    weights: An optional array of weights for different VGG layers. If None, the
+      default weights are used (see vgg19.vgg_loss documentation).
+  Returns:
+    The perceptual loss.
+  """
+  return vgg19.vgg_loss(prediction['image'], example['y'], vgg_model_file,
+                        weights)
+@gin.configurable('style', denylist=['example', 'prediction'])
+def style_loss(example: Mapping[str, tf.Tensor],
+               prediction: Mapping[str, tf.Tensor],
+               vgg_model_file: str,
+               weights: Optional[List[float]] = None) -> tf.Tensor:
+  """Computes style loss from images in [0..1] color range.
+  Args:
+    example: A dictionary with the ground truth image as 'y'.
+    prediction: The prediction dictionary with the image as 'image'.
+    vgg_model_file: The path containing the vgg19 weights in MATLAB format.
+    weights: An optional array of weights for different VGG layers. If None, the
+      default weights are used (see vgg19.vgg_loss documentation).
+  Returns:
+    A tf.Tensor of a scalar representing the style loss computed over multiple
+    vgg layer features.
+  """
+  return vgg19.style_loss(prediction['image'], example['y'], vgg_model_file,
+                          weights)
+def l1_loss(example: Mapping[str, tf.Tensor],
+            prediction: Mapping[str, tf.Tensor]) -> tf.Tensor:
+  return tf.reduce_mean(tf.abs(prediction['image'] - example['y']))
+def l1_warped_loss(example: Mapping[str, tf.Tensor],
+                   prediction: Mapping[str, tf.Tensor]) -> tf.Tensor:
+  """Computes an l1 loss using only warped images.
+  Args:
+    example: A dictionary with the ground truth image as 'y'.
+    prediction: The prediction dictionary with the image(s) as 'x0_warped'
+      and/or 'x1_warped'.
+  Returns:
+    A tf.Tensor of a scalar representing the linear combination of l1 losses
+      between prediction images and y.
+  """
+  loss = tf.constant(0.0, dtype=tf.float32)
+  if 'x0_warped' in prediction:
+    loss += tf.reduce_mean(tf.abs(prediction['x0_warped'] - example['y']))
+  if 'x1_warped' in prediction:
+    loss += tf.reduce_mean(tf.abs(prediction['x1_warped'] - example['y']))
+  return loss
+def l2_loss(example: Mapping[str, tf.Tensor],
+            prediction: Mapping[str, tf.Tensor]) -> tf.Tensor:
+  return tf.reduce_mean(tf.square(prediction['image'] - example['y']))
+def ssim_loss(example: Mapping[str, tf.Tensor],
+              prediction: Mapping[str, tf.Tensor]) -> tf.Tensor:
+  image = prediction['image']
+  y = example['y']
+  return tf.reduce_mean(tf.image.ssim(image, y, max_val=1.0))
+def psnr_loss(example: Mapping[str, tf.Tensor],
+              prediction: Mapping[str, tf.Tensor]) -> tf.Tensor:
+  return tf.reduce_mean(
+      tf.image.psnr(prediction['image'], example['y'], max_val=1.0))
+def get_loss(loss_name: str) -> Callable[[Any, Any], tf.Tensor]:
+  """Returns the loss function corresponding to the given name."""
+  if loss_name == 'l1':
+    return l1_loss
+  elif loss_name == 'l2':
+    return l2_loss
+  elif loss_name == 'ssim':
+    return ssim_loss
+  elif loss_name == 'vgg':
+    return vgg_loss
+  elif loss_name == 'style':
+    return style_loss
+  elif loss_name == 'psnr':
+    return psnr_loss
+  elif loss_name == 'l1_warped':
+    return l1_warped_loss
+  else:
+    raise ValueError('Invalid loss function %s' % loss_name)
+# pylint: disable=unnecessary-lambda
+def get_loss_op(loss_name):
+  """Returns a function for creating a loss calculation op."""
+  loss = get_loss(loss_name)
+  return lambda example, prediction: loss(example, prediction)
+def get_weight_op(weight_schedule):
+  """Returns a function for creating an iteration dependent loss weight op."""
+  return lambda iterations: weight_schedule(iterations)
+def create_losses(
+    loss_names: List[str], loss_weight_schedules: List[
+        tf.keras.optimizers.schedules.LearningRateSchedule]
+) -> Dict[str, Tuple[Callable[[Any, Any], tf.Tensor], Callable[[Any],
+                                                               tf.Tensor]]]:
+  """Returns a dictionary of functions for creating loss and loss_weight ops.
+  As an example, create_losses(['l1', 'l2'], [PiecewiseConstantDecay(),
+  PiecewiseConstantDecay()]) returns a dictionary with two keys, and each value
+  being a tuple of ops for loss calculation and loss_weight sampling.
+  Args:
+      loss_names: Names of the losses.
+      loss_weight_schedules: Instances of loss weight schedules.
+  Returns:
+    A dictionary that contains the loss and weight schedule ops keyed by the
+    names.
+  """
+  losses = dict()
+  for name, weight_schedule in zip(loss_names, loss_weight_schedules):
+    unique_values = np.unique(weight_schedule.values)
+    if len(unique_values) == 1 and unique_values[0] == 1.0:
+      # Special case 'no weight' for prettier TensorBoard summaries.
+      weighted_name = name
+    else:
+      # Weights are variable/scheduled, a constant "k" is used to
+      # indicate weights are iteration dependent.
+      weighted_name = 'k*' + name
+    losses[weighted_name] = (get_loss_op(name), get_weight_op(weight_schedule))
+  return losses
+@gin.configurable
+def training_losses(
+    loss_names: List[str],
+    loss_weights: Optional[List[float]] = None,
+    loss_weight_schedules: Optional[List[
+        tf.keras.optimizers.schedules.LearningRateSchedule]] = None,
+    loss_weight_parameters: Optional[List[Mapping[str, List[Any]]]] = None
+) -> Mapping[str, Tuple[Callable[[Any, Any], tf.Tensor], Callable[[Any],
+                                                                  tf.Tensor]]]:
+  """Creates the training loss functions and loss weight schedules."""
+  weight_schedules = []
+  if not loss_weights:
+    for weight_schedule, weight_parameters in zip(loss_weight_schedules,
+                                                  loss_weight_parameters):
+      weight_schedules.append(weight_schedule(**weight_parameters))
+  else:
+    for loss_weight in loss_weights:
+      weight_parameters = {
+          'boundaries': [0],
+          'values': 2 * [
+              loss_weight,
+          ]
+      }
+      weight_schedules.append(
+          tf.keras.optimizers.schedules.PiecewiseConstantDecay(
+              **weight_parameters))
+  return create_losses(loss_names, weight_schedules)
+@gin.configurable
+def test_losses(
+    loss_names: List[str],
+    loss_weights: Optional[List[float]] = None,
+    loss_weight_schedules: Optional[List[
+        tf.keras.optimizers.schedules.LearningRateSchedule]] = None,
+    loss_weight_parameters: Optional[List[Mapping[str, List[Any]]]] = None
+) -> Mapping[str, Tuple[Callable[[Any, Any], tf.Tensor], Callable[[Any],
+                                                                  tf.Tensor]]]:
+  """Creates the test loss functions and loss weight schedules."""
+  weight_schedules = []
+  if not loss_weights:
+    for weight_schedule, weight_parameters in zip(loss_weight_schedules,
+                                                  loss_weight_parameters):
+      weight_schedules.append(weight_schedule(**weight_parameters))
+  else:
+    for loss_weight in loss_weights:
+      weight_parameters = {
+          'boundaries': [0],
+          'values': 2 * [
+              loss_weight,
+          ]
+      }
+      weight_schedules.append(
+          tf.keras.optimizers.schedules.PiecewiseConstantDecay(
+              **weight_parameters))
+  return create_losses(loss_names, weight_schedules)
+def aggregate_batch_losses(
+    batch_losses: List[Mapping[str, float]]) -> Mapping[str, float]:
+  """Averages per batch losses into single dictionary for the whole epoch.
+  As an example, if the batch_losses contained per batch losses:
+  batch_losses = { {'l1': 0.2, 'ssim': 0.9}, {'l1': 0.3, 'ssim': 0.8}}
+  The returned dictionary would look like: { 'l1': 0.25, 'ssim': 0.95 }
+  Args:
+    batch_losses: A list of dictionary objects, with one entry for each loss.
+  Returns:
+    Single dictionary with the losses aggregated.
+  """
+  transp_losses = {}
+  # Loop through all losses
+  for batch_loss in batch_losses:
+    # Loop through per batch losses of a single type:
+    for loss_name, loss in batch_loss.items():
+      if loss_name not in transp_losses:
+        transp_losses[loss_name] = []
+      transp_losses[loss_name].append(loss)
+  aggregate_losses = {}
+  for loss_name in transp_losses:
+    aggregate_losses[loss_name] = np.mean(transp_losses[loss_name])
+  return aggregate_losses

losses/vgg19_loss.py ADDED Viewed

	@@ -0,0 +1,362 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Feature loss based on 19 layer VGG network.
+The network layers in the feature loss is weighted as described in
+'Stereo Magnification: Learning View Synthesis using Multiplane Images',
+Tinghui Zhou, Richard Tucker, Flynn, Graham Fyffe, Noah Snavely, SIGGRAPH 2018.
+"""
+from typing import Any, Callable, Dict, Optional, Sequence, Tuple
+import numpy as np
+import scipy.io as sio
+import tensorflow.compat.v1 as tf
+def _build_net(layer_type: str,
+               input_tensor: tf.Tensor,
+               weight_bias: Optional[Tuple[tf.Tensor, tf.Tensor]] = None,
+               name: Optional[str] = None) -> Callable[[Any], Any]:
+  """Build a layer of the VGG network.
+  Args:
+    layer_type: A string, type of this layer.
+    input_tensor: A tensor.
+    weight_bias: A tuple of weight and bias.
+    name: A string, name of this layer.
+  Returns:
+    A callable function of the tensorflow layer.
+  Raises:
+    ValueError: If layer_type is not conv or pool.
+  """
+  if layer_type == 'conv':
+    return tf.nn.relu(
+        tf.nn.conv2d(
+            input_tensor,
+            weight_bias[0],
+            strides=[1, 1, 1, 1],
+            padding='SAME',
+            name=name) + weight_bias[1])
+  elif layer_type == 'pool':
+    return tf.nn.avg_pool(
+        input_tensor, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+  else:
+    raise ValueError('Unsupported layer %s' % layer_type)
+def _get_weight_and_bias(vgg_layers: np.ndarray,
+                         index: int) -> Tuple[tf.Tensor, tf.Tensor]:
+  """Get the weight and bias of a specific layer from the VGG pretrained model.
+  Args:
+    vgg_layers: An array, the VGG pretrained model.
+    index: An integer, index of the layer.
+  Returns:
+    weights: A tensor.
+    bias: A tensor.
+  """
+  weights = vgg_layers[index][0][0][2][0][0]
+  weights = tf.constant(weights)
+  bias = vgg_layers[index][0][0][2][0][1]
+  bias = tf.constant(np.reshape(bias, (bias.size)))
+  return weights, bias
+def _build_vgg19(image: tf.Tensor, model_filepath: str) -> Dict[str, tf.Tensor]:
+  """Builds the VGG network given the model weights.
+  The weights are loaded only for the first time this code is invoked.
+  Args:
+    image: A tensor, input image.
+    model_filepath: A string, path to the VGG pretrained model.
+  Returns:
+    net: A dict mapping a layer name to a tensor.
+  """
+  with tf.variable_scope('vgg', reuse=True):
+    net = {}
+    if not hasattr(_build_vgg19, 'vgg_rawnet'):
+      with tf.io.gfile.GFile(model_filepath, 'rb') as f:
+        _build_vgg19.vgg_rawnet = sio.loadmat(f)
+    vgg_layers = _build_vgg19.vgg_rawnet['layers'][0]
+    imagenet_mean = tf.constant([123.6800, 116.7790, 103.9390],
+                                shape=[1, 1, 1, 3])
+    net['input'] = image - imagenet_mean
+    net['conv1_1'] = _build_net(
+        'conv',
+        net['input'],
+        _get_weight_and_bias(vgg_layers, 0),
+        name='vgg_conv1_1')
+    net['conv1_2'] = _build_net(
+        'conv',
+        net['conv1_1'],
+        _get_weight_and_bias(vgg_layers, 2),
+        name='vgg_conv1_2')
+    net['pool1'] = _build_net('pool', net['conv1_2'])
+    net['conv2_1'] = _build_net(
+        'conv',
+        net['pool1'],
+        _get_weight_and_bias(vgg_layers, 5),
+        name='vgg_conv2_1')
+    net['conv2_2'] = _build_net(
+        'conv',
+        net['conv2_1'],
+        _get_weight_and_bias(vgg_layers, 7),
+        name='vgg_conv2_2')
+    net['pool2'] = _build_net('pool', net['conv2_2'])
+    net['conv3_1'] = _build_net(
+        'conv',
+        net['pool2'],
+        _get_weight_and_bias(vgg_layers, 10),
+        name='vgg_conv3_1')
+    net['conv3_2'] = _build_net(
+        'conv',
+        net['conv3_1'],
+        _get_weight_and_bias(vgg_layers, 12),
+        name='vgg_conv3_2')
+    net['conv3_3'] = _build_net(
+        'conv',
+        net['conv3_2'],
+        _get_weight_and_bias(vgg_layers, 14),
+        name='vgg_conv3_3')
+    net['conv3_4'] = _build_net(
+        'conv',
+        net['conv3_3'],
+        _get_weight_and_bias(vgg_layers, 16),
+        name='vgg_conv3_4')
+    net['pool3'] = _build_net('pool', net['conv3_4'])
+    net['conv4_1'] = _build_net(
+        'conv',
+        net['pool3'],
+        _get_weight_and_bias(vgg_layers, 19),
+        name='vgg_conv4_1')
+    net['conv4_2'] = _build_net(
+        'conv',
+        net['conv4_1'],
+        _get_weight_and_bias(vgg_layers, 21),
+        name='vgg_conv4_2')
+    net['conv4_3'] = _build_net(
+        'conv',
+        net['conv4_2'],
+        _get_weight_and_bias(vgg_layers, 23),
+        name='vgg_conv4_3')
+    net['conv4_4'] = _build_net(
+        'conv',
+        net['conv4_3'],
+        _get_weight_and_bias(vgg_layers, 25),
+        name='vgg_conv4_4')
+    net['pool4'] = _build_net('pool', net['conv4_4'])
+    net['conv5_1'] = _build_net(
+        'conv',
+        net['pool4'],
+        _get_weight_and_bias(vgg_layers, 28),
+        name='vgg_conv5_1')
+    net['conv5_2'] = _build_net(
+        'conv',
+        net['conv5_1'],
+        _get_weight_and_bias(vgg_layers, 30),
+        name='vgg_conv5_2')
+  return net
+def _compute_error(fake: tf.Tensor,
+                   real: tf.Tensor,
+                   mask: Optional[tf.Tensor] = None) -> tf.Tensor:
+  """Computes the L1 loss and reweights by the mask."""
+  if mask is None:
+    return tf.reduce_mean(tf.abs(fake - real))
+  else:
+    # Resizes mask to the same size as the input.
+    size = (tf.shape(fake)[1], tf.shape(fake)[2])
+    resized_mask = tf.image.resize(
+        mask, size, method=tf.image.ResizeMethod.BILINEAR)
+    return tf.reduce_mean(tf.abs(fake - real) * resized_mask)
+# Normalized VGG loss (from
+# https://github.com/CQFIO/PhotographicImageSynthesis)
+def vgg_loss(image: tf.Tensor,
+             reference: tf.Tensor,
+             vgg_model_file: str,
+             weights: Optional[Sequence[float]] = None,
+             mask: Optional[tf.Tensor] = None) -> tf.Tensor:
+  """Computes the VGG loss for an image pair.
+  The VGG loss is the average feature vector difference between the two images.
+  The input images must be in [0, 1] range in (B, H, W, 3) RGB format and
+  the recommendation seems to be to have them in gamma space.
+  The pretrained weights are publicly available in
+    http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat
+  Args:
+    image: A tensor, typically the prediction from a network.
+    reference: A tensor, the image to compare against, i.e. the golden image.
+    vgg_model_file: A string, filename for the VGG 19 network weights in MATLAB
+      format.
+    weights: A list of float, optional weights for the layers. The defaults are
+      from Qifeng Chen and Vladlen Koltun, "Photographic image synthesis with
+      cascaded refinement networks," ICCV 2017.
+    mask: An optional image-shape and single-channel tensor, the mask values are
+      per-pixel weights to be applied on the losses. The mask will be resized to
+      the same spatial resolution with the feature maps before been applied to
+      the losses. When the mask value is zero, pixels near the boundary of the
+      mask can still influence the loss if they fall into the receptive field of
+      the VGG convolutional layers.
+  Returns:
+    vgg_loss: The linear combination of losses from five VGG layers.
+  """
+  if not weights:
+    weights = [1.0 / 2.6, 1.0 / 4.8, 1.0 / 3.7, 1.0 / 5.6, 10.0 / 1.5]
+  vgg_ref = _build_vgg19(reference * 255.0, vgg_model_file)
+  vgg_img = _build_vgg19(image * 255.0, vgg_model_file)
+  p1 = _compute_error(vgg_ref['conv1_2'], vgg_img['conv1_2'], mask) * weights[0]
+  p2 = _compute_error(vgg_ref['conv2_2'], vgg_img['conv2_2'], mask) * weights[1]
+  p3 = _compute_error(vgg_ref['conv3_2'], vgg_img['conv3_2'], mask) * weights[2]
+  p4 = _compute_error(vgg_ref['conv4_2'], vgg_img['conv4_2'], mask) * weights[3]
+  p5 = _compute_error(vgg_ref['conv5_2'], vgg_img['conv5_2'], mask) * weights[4]
+  final_loss = p1 + p2 + p3 + p4 + p5
+  # Scale to range [0..1].
+  final_loss /= 255.0
+  return final_loss
+def _compute_gram_matrix(input_features: tf.Tensor,
+                         mask: tf.Tensor) -> tf.Tensor:
+  """Computes Gram matrix of `input_features`.
+  Gram matrix described in https://en.wikipedia.org/wiki/Gramian_matrix.
+  Args:
+    input_features: A tf.Tensor of shape (B, H, W, C) representing a feature map
+      obtained by a convolutional layer of a VGG network.
+    mask: A tf.Tensor of shape (B, H, W, 1) representing the per-pixel weights
+      to be applied on the `input_features`. The mask will be resized to the
+      same spatial resolution as the `input_featues`. When the mask value is
+      zero, pixels near the boundary of the mask can still influence the loss if
+      they fall into the receptive field of the VGG convolutional layers.
+  Returns:
+    A tf.Tensor of shape (B, C, C) representing the gram matrix of the masked
+    `input_features`.
+  """
+  _, h, w, c = tuple([
+      i if (isinstance(i, int) or i is None) else i.value
+      for i in input_features.shape
+  ])
+  if mask is None:
+    reshaped_features = tf.reshape(input_features, (-1, h * w, c))
+  else:
+    # Resize mask to match the shape of `input_features`
+    resized_mask = tf.image.resize(
+        mask, (h, w), method=tf.image.ResizeMethod.BILINEAR)
+    reshaped_features = tf.reshape(input_features * resized_mask,
+                                   (-1, h * w, c))
+  return tf.matmul(
+      reshaped_features, reshaped_features, transpose_a=True) / float(h * w)
+def style_loss(image: tf.Tensor,
+               reference: tf.Tensor,
+               vgg_model_file: str,
+               weights: Optional[Sequence[float]] = None,
+               mask: Optional[tf.Tensor] = None) -> tf.Tensor:
+  """Computes style loss as used in `A Neural Algorithm of Artistic Style`.
+  Based on the work in https://github.com/cysmith/neural-style-tf. Weights are
+  first initilaized to the inverse of the number of elements in each VGG layer
+  considerd. After 1.5M iterations, they are rescaled to normalize the
+  contribution of the Style loss to be equal to other losses (L1/VGG). This is
+  based on the works of image inpainting (https://arxiv.org/abs/1804.07723)
+  and frame prediction (https://arxiv.org/abs/1811.00684).
+  The style loss is the average gram matrix difference between `image` and
+  `reference`. The gram matrix is the inner product of a feature map of shape
+  (B, H*W, C) with itself. Results in a symmetric gram matrix shaped (B, C, C).
+  The input images must be in [0, 1] range in (B, H, W, 3) RGB format and
+  the recommendation seems to be to have them in gamma space.
+  The pretrained weights are publicly available in
+    http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat
+  Args:
+    image: A tensor, typically the prediction from a network.
+    reference: A tensor, the image to compare against, i.e. the golden image.
+    vgg_model_file: A string, filename for the VGG 19 network weights in MATLAB
+      format.
+    weights: A list of float, optional weights for the layers. The defaults are
+      from Qifeng Chen and Vladlen Koltun, "Photographic image synthesis with
+      cascaded refinement networks," ICCV 2017.
+    mask: An optional image-shape and single-channel tensor, the mask values are
+      per-pixel weights to be applied on the losses. The mask will be resized to
+      the same spatial resolution with the feature maps before been applied to
+      the losses. When the mask value is zero, pixels near the boundary of the
+      mask can still influence the loss if they fall into the receptive field of
+      the VGG convolutional layers.
+  Returns:
+    Style loss, a linear combination of gram matrix L2 differences of from five
+    VGG layer features.
+  """
+  if not weights:
+    weights = [1.0 / 2.6, 1.0 / 4.8, 1.0 / 3.7, 1.0 / 5.6, 10.0 / 1.5]
+  vgg_ref = _build_vgg19(reference * 255.0, vgg_model_file)
+  vgg_img = _build_vgg19(image * 255.0, vgg_model_file)
+  p1 = tf.reduce_mean(
+      tf.squared_difference(
+          _compute_gram_matrix(vgg_ref['conv1_2'] / 255.0, mask),
+          _compute_gram_matrix(vgg_img['conv1_2'] / 255.0, mask))) * weights[0]
+  p2 = tf.reduce_mean(
+      tf.squared_difference(
+          _compute_gram_matrix(vgg_ref['conv2_2'] / 255.0, mask),
+          _compute_gram_matrix(vgg_img['conv2_2'] / 255.0, mask))) * weights[1]
+  p3 = tf.reduce_mean(
+      tf.squared_difference(
+          _compute_gram_matrix(vgg_ref['conv3_2'] / 255.0, mask),
+          _compute_gram_matrix(vgg_img['conv3_2'] / 255.0, mask))) * weights[2]
+  p4 = tf.reduce_mean(
+      tf.squared_difference(
+          _compute_gram_matrix(vgg_ref['conv4_2'] / 255.0, mask),
+          _compute_gram_matrix(vgg_img['conv4_2'] / 255.0, mask))) * weights[3]
+  p5 = tf.reduce_mean(
+      tf.squared_difference(
+          _compute_gram_matrix(vgg_ref['conv5_2'] / 255.0, mask),
+          _compute_gram_matrix(vgg_img['conv5_2'] / 255.0, mask))) * weights[4]
+  final_loss = p1 + p2 + p3 + p4 + p5
+  return final_loss

models/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

models/film_net/feature_extractor.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TF2 layer for extracting image features for the film_net interpolator.
+The feature extractor implemented here converts an image pyramid into a pyramid
+of deep features. The feature pyramid serves a similar purpose as U-Net
+architecture's encoder, but we use a special cascaded architecture described in
+Multi-view Image Fusion [1].
+For comprehensiveness, below is a short description of the idea. While the
+description is a bit involved, the cascaded feature pyramid can be used just
+like any image feature pyramid.
+Why cascaded architeture?
+=========================
+To understand the concept it is worth reviewing a traditional feature pyramid
+first: *A traditional feature pyramid* as in U-net or in many optical flow
+networks is built by alternating between convolutions and pooling, starting
+from the input image.
+It is well known that early features of such architecture correspond to low
+level concepts such as edges in the image whereas later layers extract
+semantically higher level concepts such as object classes etc. In other words,
+the meaning of the filters in each resolution level is different. For problems
+such as semantic segmentation and many others this is a desirable property.
+However, the asymmetric features preclude sharing weights across resolution
+levels in the feature extractor itself and in any subsequent neural networks
+that follow. This can be a downside, since optical flow prediction, for
+instance is symmetric across resolution levels. The cascaded feature
+architecture addresses this shortcoming.
+How is it built?
+================
+The *cascaded* feature pyramid contains feature vectors that have constant
+length and meaning on each resolution level, except few of the finest ones. The
+advantage of this is that the subsequent optical flow layer can learn
+synergically from many resolutions. This means that coarse level prediction can
+benefit from finer resolution training examples, which can be useful with
+moderately sized datasets to avoid overfitting.
+The cascaded feature pyramid is built by extracting shallower subtree pyramids,
+each one of them similar to the traditional architecture. Each subtree
+pyramid S_i is extracted starting from each resolution level:
+image resolution 0 -> S_0
+image resolution 1 -> S_1
+image resolution 2 -> S_2
+...
+If we denote the features at level j of subtree i as S_i_j, the cascaded pyramid
+is constructed by concatenating features as follows (assuming subtree depth=3):
+lvl
+feat_0 = concat(                               S_0_0 )
+feat_1 = concat(                         S_1_0 S_0_1 )
+feat_2 = concat(                   S_2_0 S_1_1 S_0_2 )
+feat_3 = concat(             S_3_0 S_2_1 S_1_2       )
+feat_4 = concat(       S_4_0 S_3_1 S_2_2             )
+feat_5 = concat( S_5_0 S_4_1 S_3_2                   )
+   ....
+In above, all levels except feat_0 and feat_1 have the same number of features
+with similar semantic meaning. This enables training a single optical flow
+predictor module shared by levels 2,3,4,5... . For more details and evaluation
+see [1].
+[1] Multi-view Image Fusion, Trinidad et al. 2019
+"""
+from typing import List
+from . import options
+import tensorflow as tf
+def _relu(x: tf.Tensor) -> tf.Tensor:
+  return tf.nn.leaky_relu(x, alpha=0.2)
+def _conv(filters: int, name: str):
+  return tf.keras.layers.Conv2D(
+      name=name,
+      filters=filters,
+      kernel_size=3,
+      padding='same',
+      activation=_relu)
+class SubTreeExtractor(tf.keras.layers.Layer):
+  """Extracts a hierarchical set of features from an image.
+  This is a conventional, hierarchical image feature extractor, that extracts
+  [k, k*2, k*4... ] filters for the image pyramid where k=options.sub_levels.
+  Each level is followed by average pooling.
+  Attributes:
+    name: Name for the layer
+    config: Options for the fusion_net frame interpolator
+  """
+  def __init__(self, name: str, config: options.Options):
+    super().__init__(name=name)
+    k = config.filters
+    n = config.sub_levels
+    self.convs = []
+    for i in range(n):
+      self.convs.append(
+          _conv(filters=(k << i), name='cfeat_conv_{}'.format(2 * i)))
+      self.convs.append(
+          _conv(filters=(k << i), name='cfeat_conv_{}'.format(2 * i + 1)))
+  def call(self, image: tf.Tensor, n: int) -> List[tf.Tensor]:
+    """Extracts a pyramid of features from the image.
+    Args:
+      image: tf.Tensor with shape BATCH_SIZE x HEIGHT x WIDTH x CHANNELS.
+      n: number of pyramid levels to extract. This can be less or equal to
+       options.sub_levels given in the __init__.
+    Returns:
+      The pyramid of features, starting from the finest level. Each element
+      contains the output after the last convolution on the corresponding
+      pyramid level.
+    """
+    head = image
+    pool = tf.keras.layers.AveragePooling2D(
+        pool_size=2, strides=2, padding='valid')
+    pyramid = []
+    for i in range(n):
+      head = self.convs[2*i](head)
+      head = self.convs[2*i+1](head)
+      pyramid.append(head)
+      if i < n-1:
+        head = pool(head)
+    return pyramid
+class FeatureExtractor(tf.keras.layers.Layer):
+  """Extracts features from an image pyramid using a cascaded architecture.
+  Attributes:
+    name: Name of the layer
+    config: Options for the fusion_net frame interpolator
+  """
+  def __init__(self, name: str, config: options.Options):
+    super().__init__(name=name)
+    self.extract_sublevels = SubTreeExtractor('sub_extractor', config)
+    self.options = config
+  def call(self, image_pyramid: List[tf.Tensor]) -> List[tf.Tensor]:
+    """Extracts a cascaded feature pyramid.
+    Args:
+      image_pyramid: Image pyramid as a list, starting from the finest level.
+    Returns:
+      A pyramid of cascaded features.
+    """
+    sub_pyramids = []
+    for i in range(len(image_pyramid)):
+      # At each level of the image pyramid, creates a sub_pyramid of features
+      # with 'sub_levels' pyramid levels, re-using the same SubTreeExtractor.
+      # We use the same instance since we want to share the weights.
+      #
+      # However, we cap the depth of the sub_pyramid so we don't create features
+      # that are beyond the coarsest level of the cascaded feature pyramid we
+      # want to generate.
+      capped_sub_levels = min(len(image_pyramid) - i, self.options.sub_levels)
+      sub_pyramids.append(
+          self.extract_sublevels(image_pyramid[i], capped_sub_levels))
+    # Below we generate the cascades of features on each level of the feature
+    # pyramid. Assuming sub_levels=3, The layout of the features will be
+    # as shown in the example on file documentation above.
+    feature_pyramid = []
+    for i in range(len(image_pyramid)):
+      features = sub_pyramids[i][0]
+      for j in range(1, self.options.sub_levels):
+        if j <= i:
+          features = tf.concat([features, sub_pyramids[i - j][j]], axis=-1)
+      feature_pyramid.append(features)
+    return feature_pyramid

models/film_net/fusion.py ADDED Viewed

	@@ -0,0 +1,140 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The final fusion stage for the film_net frame interpolator.
+The inputs to this module are the warped input images, image features and
+flow fields, all aligned to the target frame (often midway point between the
+two original inputs). The output is the final image. FILM has no explicit
+occlusion handling -- instead using the abovementioned information this module
+automatically decides how to best blend the inputs together to produce content
+in areas where the pixels can only be borrowed from one of the inputs.
+Similarly, this module also decides on how much to blend in each input in case
+of fractional timestep that is not at the halfway point. For example, if the two
+inputs images are at t=0 and t=1, and we were to synthesize a frame at t=0.1,
+it often makes most sense to favor the first input. However, this is not
+always the case -- in particular in occluded pixels.
+The architecture of the Fusion module follows U-net [1] architecture's decoder
+side, e.g. each pyramid level consists of concatenation with upsampled coarser
+level output, and two 3x3 convolutions.
+The upsampling is implemented as 'resize convolution', e.g. nearest neighbor
+upsampling followed by 2x2 convolution as explained in [2]. The classic U-net
+uses max-pooling which has a tendency to create checkerboard artifacts.
+[1] Ronneberger et al. U-Net: Convolutional Networks for Biomedical Image
+    Segmentation, 2015, https://arxiv.org/pdf/1505.04597.pdf
+[2] https://distill.pub/2016/deconv-checkerboard/
+"""
+from typing import List
+from . import options
+import tensorflow as tf
+def _relu(x: tf.Tensor) -> tf.Tensor:
+  return tf.nn.leaky_relu(x, alpha=0.2)
+_NUMBER_OF_COLOR_CHANNELS = 3
+class Fusion(tf.keras.layers.Layer):
+  """The decoder."""
+  def __init__(self, name: str, config: options.Options):
+    super().__init__(name=name)
+    # Each item 'convs[i]' will contain the list of convolutions to be applied
+    # for pyramid level 'i'.
+    self.convs: List[List[tf.keras.layers.Layer]] = []
+    # Store the levels, so we can verify right number of levels in call().
+    self.levels = config.fusion_pyramid_levels
+    # Create the convolutions. Roughly following the feature extractor, we
+    # double the number of filters when the resolution halves, but only up to
+    # the specialized_levels, after which we use the same number of filters on
+    # all levels.
+    #
+    # We create the convs in fine-to-coarse order, so that the array index
+    # for the convs will correspond to our normal indexing (0=finest level).
+    for i in range(config.fusion_pyramid_levels - 1):
+      m = config.specialized_levels
+      k = config.filters
+      num_filters = (k << i) if i < m else (k << m)
+      convs: List[tf.keras.layers.Layer] = []
+      convs.append(
+          tf.keras.layers.Conv2D(
+              filters=num_filters, kernel_size=[2, 2], padding='same'))
+      convs.append(
+          tf.keras.layers.Conv2D(
+              filters=num_filters,
+              kernel_size=[3, 3],
+              padding='same',
+              activation=_relu))
+      convs.append(
+          tf.keras.layers.Conv2D(
+              filters=num_filters,
+              kernel_size=[3, 3],
+              padding='same',
+              activation=_relu))
+      self.convs.append(convs)
+    # The final convolution that outputs RGB:
+    self.output_conv = tf.keras.layers.Conv2D(
+        filters=_NUMBER_OF_COLOR_CHANNELS, kernel_size=1)
+  def call(self, pyramid: List[tf.Tensor]) -> tf.Tensor:
+    """Runs the fusion module.
+    Args:
+      pyramid: The input feature pyramid as list of tensors. Each tensor being
+        in (B x H x W x C) format, with finest level tensor first.
+    Returns:
+      A batch of RGB images.
+    Raises:
+      ValueError, if len(pyramid) != config.fusion_pyramid_levels as provided in
+        the constructor.
+    """
+    if len(pyramid) != self.levels:
+      raise ValueError(
+          'Fusion called with different number of pyramid levels '
+          f'{len(pyramid)} than it was configured for, {self.levels}.')
+    # As a slight difference to a conventional decoder (e.g. U-net), we don't
+    # apply any extra convolutions to the coarsest level, but just pass it
+    # to finer levels for concatenation. This choice has not been thoroughly
+    # evaluated, but is motivated by the educated guess that the fusion part
+    # probably does not need large spatial context, because at this point the
+    # features are spatially aligned by the preceding warp.
+    net = pyramid[-1]
+    # Loop starting from the 2nd coarsest level:
+    for i in reversed(range(0, self.levels - 1)):
+      # Resize the tensor from coarser level to match for concatenation.
+      level_size = tf.shape(pyramid[i])[1:3]
+      net = tf.image.resize(net, level_size,
+                            tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+      net = self.convs[i][0](net)
+      net = tf.concat([pyramid[i], net], axis=-1)
+      net = self.convs[i][1](net)
+      net = self.convs[i][2](net)
+    net = self.output_conv(net)
+    return net

models/film_net/interpolator.py ADDED Viewed

	@@ -0,0 +1,207 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The film_net frame interpolator main model code.
+Basics
+======
+The film_net is an end-to-end learned neural frame interpolator implemented as
+a TF2 model. It has the following inputs and outputs:
+Inputs:
+  x0: image A.
+  x1: image B.
+  time: desired sub-frame time.
+Outputs:
+  image: the predicted in-between image at the chosen time in range [0, 1].
+Additional outputs include forward and backward warped image pyramids, flow
+pyramids, etc., that can be visualized for debugging and analysis.
+Note that many training sets only contain triplets with ground truth at
+time=0.5. If a model has been trained with such training set, it will only work
+well for synthesizing frames at time=0.5. Such models can only generate more
+in-between frames using recursion.
+Architecture
+============
+The inference consists of three main stages: 1) feature extraction 2) warping
+3) fusion. On high-level, the architecture has similarities to Context-aware
+Synthesis for Video Frame Interpolation [1], but the exact architecture is
+closer to Multi-view Image Fusion [2] with some modifications for the frame
+interpolation use-case.
+Feature extraction stage employs the cascaded multi-scale architecture described
+in [2]. The advantage of this architecture is that coarse level flow prediction
+can be learned from finer resolution image samples. This is especially useful
+to avoid overfitting with moderately sized datasets.
+The warping stage uses a residual flow prediction idea that is similar to
+PWC-Net [3], Multi-view Image Fusion [2] and many others.
+The fusion stage is similar to U-Net's decoder where the skip connections are
+connected to warped image and feature pyramids. This is described in [2].
+Implementation Conventions
+====================
+Pyramids
+--------
+Throughtout the model, all image and feature pyramids are stored as python lists
+with finest level first followed by downscaled versions obtained by successively
+halving the resolution. The depths of all pyramids are determined by
+options.pyramid_levels. The only exception to this is internal to the feature
+extractor, where smaller feature pyramids are temporarily constructed with depth
+options.sub_levels.
+Color ranges & gamma
+--------------------
+The model code makes no assumptions on whether the images are in gamma or
+linearized space or what is the range of RGB color values. So a model can be
+trained with different choices. This does not mean that all the choices lead to
+similar results. In practice the model has been proven to work well with RGB
+scale = [0,1] with gamma-space images (i.e. not linearized).
+[1] Context-aware Synthesis for Video Frame Interpolation, Niklaus and Liu, 2018
+[2] Multi-view Image Fusion, Trinidad et al, 2019
+[3] PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume
+"""
+from . import feature_extractor
+from . import fusion
+from . import options
+from . import pyramid_flow_estimator
+from . import util
+import tensorflow as tf
+def create_model(x0: tf.Tensor, x1: tf.Tensor, time: tf.Tensor,
+                 config: options.Options) -> tf.keras.Model:
+  """Creates a frame interpolator model.
+  The frame interpolator is used to warp the two images to the in-between frame
+  at given time. Note that training data is often restricted such that
+  supervision only exists at 'time'=0.5. If trained with such data, the model
+  will overfit to predicting images that are halfway between the two inputs and
+  will not be as accurate elsewhere.
+  Args:
+    x0: first input image as BxHxWxC tensor.
+    x1: second input image as BxHxWxC tensor.
+    time: ignored by film_net. We always infer a frame at t = 0.5.
+    config: FilmNetOptions object.
+  Returns:
+    A tf.Model that takes 'x0', 'x1', and 'time' as input and returns a
+          dictionary with the interpolated result in 'image'. For additional
+          diagnostics or supervision, the following intermediate results are
+          also stored in the dictionary:
+          'x0_warped': an intermediate result obtained by warping from x0
+          'x1_warped': an intermediate result obtained by warping from x1
+          'forward_residual_flow_pyramid': pyramid with forward residual flows
+          'backward_residual_flow_pyramid': pyramid with backward residual flows
+          'forward_flow_pyramid': pyramid with forward flows
+          'backward_flow_pyramid': pyramid with backward flows
+  Raises:
+    ValueError, if config.pyramid_levels < config.fusion_pyramid_levels.
+  """
+  if config.pyramid_levels < config.fusion_pyramid_levels:
+    raise ValueError('config.pyramid_levels must be greater than or equal to '
+                     'config.fusion_pyramid_levels.')
+  x0_decoded = x0
+  x1_decoded = x1
+  # shuffle images
+  image_pyramids = [
+      util.build_image_pyramid(x0_decoded, config),
+      util.build_image_pyramid(x1_decoded, config)
+  ]
+  # Siamese feature pyramids:
+  extract = feature_extractor.FeatureExtractor('feat_net', config)
+  feature_pyramids = [extract(image_pyramids[0]), extract(image_pyramids[1])]
+  predict_flow = pyramid_flow_estimator.PyramidFlowEstimator(
+      'predict_flow', config)
+  # Predict forward flow.
+  forward_residual_flow_pyramid = predict_flow(feature_pyramids[0],
+                                               feature_pyramids[1])
+  # Predict backward flow.
+  backward_residual_flow_pyramid = predict_flow(feature_pyramids[1],
+                                                feature_pyramids[0])
+  # Concatenate features and images:
+  # Note that we keep up to 'fusion_pyramid_levels' levels as only those
+  # are used by the fusion module.
+  fusion_pyramid_levels = config.fusion_pyramid_levels
+  forward_flow_pyramid = util.flow_pyramid_synthesis(
+      forward_residual_flow_pyramid)[:fusion_pyramid_levels]
+  backward_flow_pyramid = util.flow_pyramid_synthesis(
+      backward_residual_flow_pyramid)[:fusion_pyramid_levels]
+  # We multiply the flows with t and 1-t to warp to the desired fractional time.
+  #
+  # Note: In film_net we fix time to be 0.5, and recursively invoke the interpo-
+  # lator for multi-frame interpolation. Below, we create a constant tensor of
+  # shape [B]. We use the `time` tensor to infer the batch size.
+  mid_time = tf.keras.layers.Lambda(lambda x: tf.ones_like(x) * 0.5)(time)
+  backward_flow = util.multiply_pyramid(backward_flow_pyramid, mid_time[:, 0])
+  forward_flow = util.multiply_pyramid(forward_flow_pyramid, 1 - mid_time[:, 0])
+  pyramids_to_warp = [
+      util.concatenate_pyramids(image_pyramids[0][:fusion_pyramid_levels],
+                                feature_pyramids[0][:fusion_pyramid_levels]),
+      util.concatenate_pyramids(image_pyramids[1][:fusion_pyramid_levels],
+                                feature_pyramids[1][:fusion_pyramid_levels])
+  ]
+  # Warp features and images using the flow. Note that we use backward warping
+  # and backward flow is used to read from image 0 and forward flow from
+  # image 1.
+  forward_warped_pyramid = util.pyramid_warp(pyramids_to_warp[0], backward_flow)
+  backward_warped_pyramid = util.pyramid_warp(pyramids_to_warp[1], forward_flow)
+  aligned_pyramid = util.concatenate_pyramids(forward_warped_pyramid,
+                                              backward_warped_pyramid)
+  aligned_pyramid = util.concatenate_pyramids(aligned_pyramid, backward_flow)
+  aligned_pyramid = util.concatenate_pyramids(aligned_pyramid, forward_flow)
+  fuse = fusion.Fusion('fusion', config)
+  prediction = fuse(aligned_pyramid)
+  output_color = prediction[..., :3]
+  outputs = {'image': output_color}
+  if config.use_aux_outputs:
+    outputs.update({
+        'x0_warped': forward_warped_pyramid[0][..., 0:3],
+        'x1_warped': backward_warped_pyramid[0][..., 0:3],
+        'forward_residual_flow_pyramid': forward_residual_flow_pyramid,
+        'backward_residual_flow_pyramid': backward_residual_flow_pyramid,
+        'forward_flow_pyramid': forward_flow_pyramid,
+        'backward_flow_pyramid': backward_flow_pyramid,
+    })
+  model = tf.keras.Model(
+      inputs={
+          'x0': x0,
+          'x1': x1,
+          'time': time
+      }, outputs=outputs)
+  return model

models/film_net/options.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Options for the film_net video frame interpolator."""
+import gin.tf
+@gin.configurable('film_net')
+class Options(object):
+  """Options for the film_net video frame interpolator.
+  To further understand these options, see the paper here:
+  https://augmentedperception.github.io/pixelfusion/.
+  The default values are suitable for up to 64 pixel motions. For larger motions
+  the number of flow convolutions and/or pyramid levels can be increased, but
+  usually with the cost of accuracy on solving the smaller motions.
+  The maximum motion in pixels that the system can resolve is equivalent to
+  2^(pyramid_levels-1) * flow_convs[-1]. I.e. the downsampling factor times
+  the receptive field radius on the coarsest pyramid level. This, of course,
+  assumes that the training data contains such motions.
+  Note that to avoid a run-time error, the input image width and height have to
+  be divisible by 2^(pyramid_levels-1).
+  Attributes:
+    pyramid_levels: How many pyramid levels to use for the feature pyramid and
+      the flow prediction.
+    fusion_pyramid_levels: How many pyramid levels to use for the fusion module
+      this must be less or equal to 'pyramid_levels'.
+    specialized_levels: How many fine levels of the pyramid shouldn't share the
+      weights. If specialized_levels = 3, it means that two finest levels are
+      independently learned, whereas the third will be learned together with the
+      rest of the pyramid. Valid range [1, pyramid_levels].
+    flow_convs: Convolutions per residual flow predictor. This array should have
+      specialized_levels+1 items on it, the last item representing the number of
+      convs used by any pyramid level that uses shared weights.
+    flow_filters: Base number of filters in residual flow predictors. This array
+      should have specialized_levels+1 items on it, the last item representing
+      the number of filters used by any pyramid level that uses shared weights.
+    sub_levels: The depth of the cascaded feature tree each pyramid level
+      concatenates together to compute the flow. This must be within range [1,
+      specialized_level+1]. It is recommended to set this to specialized_levels
+      + 1
+    filters: Base number of features to extract. On each pyramid level the
+      number doubles. This is used by both feature extraction and fusion stages.
+    use_aux_outputs: Set to True to include auxiliary outputs along with the
+      predicted image.
+  """
+  def __init__(self,
+               pyramid_levels=5,
+               fusion_pyramid_levels=5,
+               specialized_levels=3,
+               flow_convs=None,
+               flow_filters=None,
+               sub_levels=4,
+               filters=16,
+               use_aux_outputs=True):
+    self.pyramid_levels = pyramid_levels
+    self.fusion_pyramid_levels = fusion_pyramid_levels
+    self.specialized_levels = specialized_levels
+    self.flow_convs = flow_convs or [4, 4, 4, 4]
+    self.flow_filters = flow_filters or [64, 128, 256, 256]
+    self.sub_levels = sub_levels
+    self.filters = filters
+    self.use_aux_outputs = use_aux_outputs

models/film_net/pyramid_flow_estimator.py ADDED Viewed

	@@ -0,0 +1,163 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TF2 layer for estimating optical flow by a residual flow pyramid.
+This approach of estimating optical flow between two images can be traced back
+to [1], but is also used by later neural optical flow computation methods such
+as SpyNet [2] and PWC-Net [3].
+The basic idea is that the optical flow is first estimated in a coarse
+resolution, then the flow is upsampled to warp the higher resolution image and
+then a residual correction is computed and added to the estimated flow. This
+process is repeated in a pyramid on coarse to fine order to successively
+increase the resolution of both optical flow and the warped image.
+In here, the optical flow predictor is used as an internal component for the
+film_net frame interpolator, to warp the two input images into the inbetween,
+target frame.
+[1] F. Glazer, Hierarchical motion detection. PhD thesis, 1987.
+[2] A. Ranjan and M. J. Black, Optical Flow Estimation using a Spatial Pyramid
+    Network. 2016
+[3] D. Sun X. Yang, M-Y. Liu and J. Kautz, PWC-Net: CNNs for Optical Flow Using
+    Pyramid, Warping, and Cost Volume, 2017
+"""
+from typing import List
+from . import options
+from . import util
+import tensorflow as tf
+def _relu(x: tf.Tensor) -> tf.Tensor:
+  return tf.nn.leaky_relu(x, alpha=0.2)
+class FlowEstimator(tf.keras.layers.Layer):
+  """Small-receptive field predictor for computing the flow between two images.
+  This is used to compute the residual flow fields in PyramidFlowEstimator.
+  Note that while the number of 3x3 convolutions & filters to apply is
+  configurable, two extra 1x1 convolutions are appended to extract the flow in
+  the end.
+  Attributes:
+    name: The name of the layer
+    num_convs: Number of 3x3 convolutions to apply
+    num_filters: Number of filters in each 3x3 convolution
+  """
+  def __init__(self, name: str, num_convs: int, num_filters: int):
+    super(FlowEstimator, self).__init__(name=name)
+    def conv(filters, size, name, activation=_relu):
+      return tf.keras.layers.Conv2D(
+          name=name,
+          filters=filters,
+          kernel_size=size,
+          padding='same',
+          activation=activation)
+    self._convs = []
+    for i in range(num_convs):
+      self._convs.append(conv(filters=num_filters, size=3, name=f'conv_{i}'))
+    self._convs.append(conv(filters=num_filters/2, size=1, name=f'conv_{i+1}'))
+    # For the final convolution, we want no activation at all to predict the
+    # optical flow vector values. We have done extensive testing on explicitly
+    # bounding these values using sigmoid, but it turned out that having no
+    # activation gives better results.
+    self._convs.append(
+        conv(filters=2, size=1, name=f'conv_{i+2}', activation=None))
+  def call(self, features_a: tf.Tensor, features_b: tf.Tensor) -> tf.Tensor:
+    """Estimates optical flow between two images.
+    Args:
+      features_a: per pixel feature vectors for image A (B x H x W x C)
+      features_b: per pixel feature vectors for image B (B x H x W x C)
+    Returns:
+      A tensor with optical flow from A to B
+    """
+    net = tf.concat([features_a, features_b], axis=-1)
+    for conv in self._convs:
+      net = conv(net)
+    return net
+class PyramidFlowEstimator(tf.keras.layers.Layer):
+  """Predicts optical flow by coarse-to-fine refinement.
+  Attributes:
+    name: The name of the layer
+    config: Options for the film_net frame interpolator
+  """
+  def __init__(self, name: str, config: options.Options):
+    super(PyramidFlowEstimator, self).__init__(name=name)
+    self._predictors = []
+    for i in range(config.specialized_levels):
+      self._predictors.append(
+          FlowEstimator(
+              name=f'flow_predictor_{i}',
+              num_convs=config.flow_convs[i],
+              num_filters=config.flow_filters[i]))
+    shared_predictor = FlowEstimator(
+        name='flow_predictor_shared',
+        num_convs=config.flow_convs[-1],
+        num_filters=config.flow_filters[-1])
+    for i in range(config.specialized_levels, config.pyramid_levels):
+      self._predictors.append(shared_predictor)
+  def call(self, feature_pyramid_a: List[tf.Tensor],
+           feature_pyramid_b: List[tf.Tensor]) -> List[tf.Tensor]:
+    """Estimates residual flow pyramids between two image pyramids.
+    Each image pyramid is represented as a list of tensors in fine-to-coarse
+    order. Each individual image is represented as a tensor where each pixel is
+    a vector of image features.
+    util.flow_pyramid_synthesis can be used to convert the residual flow
+    pyramid returned by this method into a flow pyramid, where each level
+    encodes the flow instead of a residual correction.
+    Args:
+      feature_pyramid_a: image pyramid as a list in fine-to-coarse order
+      feature_pyramid_b: image pyramid as a list in fine-to-coarse order
+    Returns:
+      List of flow tensors, in fine-to-coarse order, each level encoding the
+      difference against the bilinearly upsampled version from the coarser
+      level. The coarsest flow tensor, e.g. the last element in the array is the
+      'DC-term', e.g. not a residual (alternatively you can think of it being a
+      residual against zero).
+    """
+    levels = len(feature_pyramid_a)
+    v = self._predictors[-1](feature_pyramid_a[-1], feature_pyramid_b[-1])
+    residuals = [v]
+    for i in reversed(range(0, levels-1)):
+      # Upsamples the flow to match the current pyramid level. Also, scales the
+      # magnitude by two to reflect the new size.
+      level_size = tf.shape(feature_pyramid_a[i])[1:3]
+      v = tf.image.resize(images=2*v, size=level_size)
+      # Warp feature_pyramid_b[i] image based on the current flow estimate.
+      warped = util.warp(feature_pyramid_b[i], v)
+      # Estimate the residual flow between pyramid_a[i] and warped image:
+      v_residual = self._predictors[i](feature_pyramid_a[i], warped)
+      residuals.append(v_residual)
+      v = v_residual + v
+    # Use reversed() to return in the 'standard' finest-first-order:
+    return list(reversed(residuals))

models/film_net/util.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Various utilities used in the film_net frame interpolator model."""
+from typing import List
+from .options import Options
+import tensorflow as tf
+import tensorflow_addons.image as tfa_image
+def build_image_pyramid(image: tf.Tensor,
+                        options: Options) -> List[tf.Tensor]:
+  """Builds an image pyramid from a given image.
+  The original image is included in the pyramid and the rest are generated by
+  successively halving the resolution.
+  Args:
+    image: the input image.
+    options: film_net options object
+  Returns:
+    A list of images starting from the finest with options.pyramid_levels items
+  """
+  levels = options.pyramid_levels
+  pyramid = []
+  pool = tf.keras.layers.AveragePooling2D(
+      pool_size=2, strides=2, padding='valid')
+  for i in range(0, levels):
+    pyramid.append(image)
+    if i < levels-1:
+      image = pool(image)
+  return pyramid
+def warp(image: tf.Tensor, flow: tf.Tensor) -> tf.Tensor:
+  """Backward warps the image using the given flow.
+  Specifically, the output pixel in batch b, at position x, y will be computed
+  as follows:
+    (flowed_y, flowed_x) = (y+flow[b, y, x, 1], x+flow[b, y, x, 0])
+    output[b, y, x] = bilinear_lookup(image, b, flowed_y, flowed_x)
+  Note that the flow vectors are expected as [x, y], e.g. x in position 0 and
+  y in position 1.
+  Args:
+    image: An image with shape BxHxWxC.
+    flow: A flow with shape BxHxWx2, with the two channels denoting the relative
+      offset in order: (dx, dy).
+  Returns:
+    A warped image.
+  """
+  # tfa_image.dense_image_warp expects unconventional negated optical flow, so
+  # negate the flow here. Also revert x and y for compatibility with older saved
+  # models trained with custom warp op that stored (x, y) instead of (y, x) flow
+  # vectors.
+  flow = -flow[..., ::-1]
+  # Note: we have to wrap tfa_image.dense_image_warp into a Keras Lambda,
+  # because it is not compatible with Keras symbolic tensors and we want to use
+  # this code as part of a Keras model.  Wrapping it into a lambda has the
+  # consequence that tfa_image.dense_image_warp is only called once the tensors
+  # are concrete, e.g. actually contain data. The inner lambda is a workaround
+  # for passing two parameters, e.g you would really want to write:
+  # tf.keras.layers.Lambda(tfa_image.dense_image_warp)(image, flow), but this is
+  # not supported by the Keras Lambda.
+  warped = tf.keras.layers.Lambda(
+      lambda x: tfa_image.dense_image_warp(*x))((image, flow))
+  return tf.reshape(warped, shape=tf.shape(image))
+def multiply_pyramid(pyramid: List[tf.Tensor],
+                     scalar: tf.Tensor) -> List[tf.Tensor]:
+  """Multiplies all image batches in the pyramid by a batch of scalars.
+  Args:
+    pyramid: Pyramid of image batches.
+    scalar: Batch of scalars.
+  Returns:
+    An image pyramid with all images multiplied by the scalar.
+  """
+  # To multiply each image with its corresponding scalar, we first transpose
+  # the batch of images from BxHxWxC-format to CxHxWxB. This can then be
+  # multiplied with a batch of scalars, then we transpose back to the standard
+  # BxHxWxC form.
+  return [
+      tf.transpose(tf.transpose(image, [3, 1, 2, 0]) * scalar, [3, 1, 2, 0])
+      for image in pyramid
+  ]
+def flow_pyramid_synthesis(
+    residual_pyramid: List[tf.Tensor]) -> List[tf.Tensor]:
+  """Converts a residual flow pyramid into a flow pyramid."""
+  flow = residual_pyramid[-1]
+  flow_pyramid = [flow]
+  for residual_flow in reversed(residual_pyramid[:-1]):
+    level_size = tf.shape(residual_flow)[1:3]
+    flow = tf.image.resize(images=2*flow, size=level_size)
+    flow = residual_flow + flow
+    flow_pyramid.append(flow)
+  # Use reversed() to return in the 'standard' finest-first-order:
+  return list(reversed(flow_pyramid))
+def pyramid_warp(feature_pyramid: List[tf.Tensor],
+                 flow_pyramid: List[tf.Tensor]) -> List[tf.Tensor]:
+  """Warps the feature pyramid using the flow pyramid.
+  Args:
+    feature_pyramid: feature pyramid starting from the finest level.
+    flow_pyramid: flow fields, starting from the finest level.
+  Returns:
+    Reverse warped feature pyramid.
+  """
+  warped_feature_pyramid = []
+  for features, flow in zip(feature_pyramid, flow_pyramid):
+    warped_feature_pyramid.append(warp(features, flow))
+  return warped_feature_pyramid
+def concatenate_pyramids(pyramid1: List[tf.Tensor],
+                         pyramid2: List[tf.Tensor]) -> List[tf.Tensor]:
+  """Concatenates each pyramid level together in the channel dimension."""
+  result = []
+  for features1, features2 in zip(pyramid1, pyramid2):
+    result.append(tf.concat([features1, features2], axis=-1))
+  return result

moment.gif ADDED Viewed

Git LFS Details

SHA256: e2624128b6b5ed9c8093a7cfdbc36a815d56ddd62987d03d8476dacb23ad4f2e
Pointer size: 133 Bytes
Size of remote file: 21.4 MB

photos/one.png ADDED Viewed

Git LFS Details

SHA256: 8bad1c97feb31a4bec60a809f808e1b0a26f55219fa991c4caa2e696bce8e81f
Pointer size: 132 Bytes
Size of remote file: 3.44 MB

photos/two.png ADDED Viewed

Git LFS Details

SHA256: d80058cede12e10b9d7fe49ea022d1cc4f9c28bd2a00a1c3d4830d048c55f3fa
Pointer size: 132 Bytes
Size of remote file: 3.39 MB

predict.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import os
+from pathlib import Path
+import numpy as np
+import tempfile
+import tensorflow as tf
+import mediapy
+from PIL import Image
+import cog
+from eval import interpolator, util
+_UINT8_MAX_F = float(np.iinfo(np.uint8).max)
+class Predictor(cog.Predictor):
+    def setup(self):
+        import tensorflow as tf
+        print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
+        self.interpolator = interpolator.Interpolator("pretrained_models/film_net/Style/saved_model", None)
+        # Batched time.
+        self.batch_dt = np.full(shape=(1,), fill_value=0.5, dtype=np.float32)
+    @cog.input(
+        "frame1",
+        type=Path,
+        help="The first input frame",
+    )
+    @cog.input(
+        "frame2",
+        type=Path,
+        help="The second input frame",
+    )
+    @cog.input(
+        "times_to_interpolate",
+        type=int,
+        default=1,
+        min=1,
+        max=8,
+        help="Controls the number of times the frame interpolator is invoked If set to 1, the output will be the "
+             "sub-frame at t=0.5; when set to > 1, the output will be the interpolation video with "
+             "(2^times_to_interpolate + 1) frames, fps of 30.",
+    )
+    def predict(self, frame1, frame2, times_to_interpolate):
+        INPUT_EXT = ['.png', '.jpg', '.jpeg']
+        assert os.path.splitext(str(frame1))[-1] in INPUT_EXT and os.path.splitext(str(frame2))[-1] in INPUT_EXT, \
+            "Please provide png, jpg or jpeg images."
+        # make sure 2 images are the same size
+        img1 = Image.open(str(frame1))
+        img2 = Image.open(str(frame2))
+        if not img1.size == img2.size:
+            img1 = img1.crop((0, 0, min(img1.size[0], img2.size[0]), min(img1.size[1], img2.size[1])))
+            img2 = img2.crop((0, 0, min(img1.size[0], img2.size[0]), min(img1.size[1], img2.size[1])))
+            frame1 = 'new_frame1.png'
+            frame2 = 'new_frame2.png'
+            img1.save(frame1)
+            img2.save(frame2)
+        if times_to_interpolate == 1:
+            # First batched image.
+            image_1 = util.read_image(str(frame1))
+            image_batch_1 = np.expand_dims(image_1, axis=0)
+            # Second batched image.
+            image_2 = util.read_image(str(frame2))
+            image_batch_2 = np.expand_dims(image_2, axis=0)
+            # Invoke the model once.
+            mid_frame = self.interpolator.interpolate(image_batch_1, image_batch_2, self.batch_dt)[0]
+            out_path = Path(tempfile.mkdtemp()) / "out.png"
+            util.write_image(str(out_path), mid_frame)
+            return out_path
+        input_frames = [str(frame1), str(frame2)]
+        frames = list(
+            util.interpolate_recursively_from_files(
+                input_frames, times_to_interpolate, self.interpolator))
+        print('Interpolated frames generated, saving now as output video.')
+        ffmpeg_path = util.get_ffmpeg_path()
+        mediapy.set_ffmpeg(ffmpeg_path)
+        out_path = Path(tempfile.mkdtemp()) / "out.mp4"
+        mediapy.write_video(str(out_path), frames, fps=30)
+        return out_path

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+# Docker base image: `gcr.io/deeplearning-platform-release/tf2-gpu.2-6:latest`
+tensorflow==2.6.2 # The latest should include tensorflow-gpu
+tensorflow-datasets==4.4.0
+tensorflow-addons==0.15.0
+absl-py==0.12.0
+gin-config==0.5.0
+parameterized==0.8.1
+mediapy==1.0.3
+scikit-image==0.19.1
+apache-beam==2.34.0
+google-cloud-bigquery-storage==1.1.0 # Suppresses a harmless error from beam
+natsort==8.1.0
+gdown==4.5.4
+tqdm==4.64.1

training/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

training/augmentation_lib.py ADDED Viewed

	@@ -0,0 +1,220 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Dataset augmentation for frame interpolation."""
+from typing import Callable, Dict, List
+import gin.tf
+import numpy as np
+import tensorflow as tf
+import tensorflow.math as tfm
+import tensorflow_addons.image as tfa_image
+_PI = 3.141592653589793
+def _rotate_flow_vectors(flow: tf.Tensor, angle_rad: float) -> tf.Tensor:
+  r"""Rotate the (u,v) vector of each pixel with angle in radians.
+  Flow matrix system of coordinates.
+  . . . . u (x)
+  .
+  .
+  . v (-y)
+  Rotation system of coordinates.
+  . y
+  .
+  .
+  . . . . x
+  Args:
+    flow: Flow map which has been image-rotated.
+    angle_rad: The rotation angle in radians.
+  Returns:
+    A flow with the same map but each (u,v) vector rotated by angle_rad.
+  """
+  u, v = tf.split(flow, 2, axis=-1)
+  # rotu = u * cos(angle) - (-v) * sin(angle)
+  rot_u = tfm.cos(angle_rad) * u + tfm.sin(angle_rad) * v
+  # rotv = -(u * sin(theta) + (-v) * cos(theta))
+  rot_v = -tfm.sin(angle_rad) * u + tfm.cos(angle_rad) * v
+  return tf.concat((rot_u, rot_v), axis=-1)
+def flow_rot90(flow: tf.Tensor, k: int) -> tf.Tensor:
+  """Rotates a flow by a multiple of 90 degrees.
+  Args:
+    flow: The flow image shaped (H, W, 2) to rotate by multiples of 90 degrees.
+    k: The multiplier factor.
+  Returns:
+    A flow image of the same shape as the input rotated by multiples of 90
+    degrees.
+  """
+  angle_rad = tf.cast(k, dtype=tf.float32) * 90. * (_PI/180.)
+  flow = tf.image.rot90(flow, k)
+  return _rotate_flow_vectors(flow, angle_rad)
+def rotate_flow(flow: tf.Tensor, angle_rad: float) -> tf.Tensor:
+  """Rotates a flow by a the provided angle in radians.
+  Args:
+    flow: The flow image shaped (H, W, 2) to rotate by multiples of 90 degrees.
+    angle_rad: The angle to ratate the flow in radians.
+  Returns:
+    A flow image of the same shape as the input rotated by the provided angle in
+    radians.
+  """
+  flow = tfa_image.rotate(
+      flow,
+      angles=angle_rad,
+      interpolation='bilinear',
+      fill_mode='reflect')
+  return _rotate_flow_vectors(flow, angle_rad)
+def flow_flip(flow: tf.Tensor) -> tf.Tensor:
+  """Flips a flow left to right.
+  Args:
+    flow: The flow image shaped (H, W, 2) to flip left to right.
+  Returns:
+    A flow image of the same shape as the input flipped left to right.
+  """
+  flow = tf.image.flip_left_right(tf.identity(flow))
+  flow_u, flow_v = tf.split(flow, 2, axis=-1)
+  return tf.stack([-1 * flow_u, flow_v], axis=-1)
+def random_image_rot90(images: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]:
+  """Rotates a stack of images by a random multiples of 90 degrees.
+  Args:
+    images: A tf.Tensor shaped (H, W, num_channels) of images stacked along the
+      channel's axis.
+  Returns:
+    A tf.Tensor of the same rank as the `images` after random rotation by
+    multiples of 90 degrees applied counter-clock wise.
+  """
+  random_k = tf.random.uniform((), minval=0, maxval=4, dtype=tf.int32)
+  for key in images:
+    images[key] = tf.image.rot90(images[key], k=random_k)
+  return images
+def random_flip(images: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]:
+  """Flips a stack of images randomly.
+  Args:
+    images: A tf.Tensor shaped (H, W, num_channels) of images stacked along the
+      channel's axis.
+  Returns:
+    A tf.Tensor of the images after random left to right flip.
+  """
+  prob = tf.random.uniform((), minval=0, maxval=2, dtype=tf.int32)
+  prob = tf.cast(prob, tf.bool)
+  def _identity(image):
+    return image
+  def _flip_left_right(image):
+    return tf.image.flip_left_right(image)
+  # pylint: disable=cell-var-from-loop
+  for key in images:
+    images[key] = tf.cond(prob, lambda: _flip_left_right(images[key]),
+                          lambda: _identity(images[key]))
+  return images
+def random_reverse(images: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]:
+  """Reverses a stack of images randomly.
+  Args:
+    images: A dictionary of tf.Tensors, each shaped (H, W, num_channels), with
+      each tensor being a stack of iamges along the last channel axis.
+  Returns:
+    A dictionary of tf.Tensors, each shaped the same as the input images dict.
+  """
+  prob = tf.random.uniform((), minval=0, maxval=2, dtype=tf.int32)
+  prob = tf.cast(prob, tf.bool)
+  def _identity(images):
+    return images
+  def _reverse(images):
+    images['x0'], images['x1'] = images['x1'], images['x0']
+    return images
+  return tf.cond(prob, lambda: _reverse(images), lambda: _identity(images))
+def random_rotate(images: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]:
+  """Rotates image randomly with [-45 to 45 degrees].
+  Args:
+    images: A tf.Tensor shaped (H, W, num_channels) of images stacked along the
+      channel's axis.
+  Returns:
+    A tf.Tensor of the images after random rotation with a bound of -72 to 72
+    degrees.
+  """
+  prob = tf.random.uniform((), minval=0, maxval=2, dtype=tf.int32)
+  prob = tf.cast(prob, tf.float32)
+  random_angle = tf.random.uniform((),
+                                   minval=-0.25 * np.pi,
+                                   maxval=0.25 * np.pi,
+                                   dtype=tf.float32)
+  for key in images:
+    images[key] = tfa_image.rotate(
+        images[key],
+        angles=random_angle * prob,
+        interpolation='bilinear',
+        fill_mode='constant')
+  return images
+@gin.configurable('data_augmentation')
+def data_augmentations(
+    names: List[str]) -> Dict[str, Callable[..., tf.Tensor]]:
+  """Creates the data augmentation functions.
+  Args:
+    names: The list of augmentation function names.
+  Returns:
+    A dictionary of Callables to the augmentation functions, keyed by their
+    names.
+  """
+  augmentations = dict()
+  for name in names:
+    if name == 'random_image_rot90':
+      augmentations[name] = random_image_rot90
+    elif name == 'random_rotate':
+      augmentations[name] = random_rotate
+    elif name == 'random_flip':
+      augmentations[name] = random_flip
+    elif name == 'random_reverse':
+      augmentations[name] = random_reverse
+    else:
+      raise AttributeError('Invalid augmentation function %s' % name)
+  return augmentations

training/build_saved_model_cli.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Converts TF2 training checkpoint to a saved model.
+The model must match the checkpoint, so the gin config must be given.
+Usage example:
+  python3 -m frame_interpolation.training.build_saved_model_cli \
+    --gin_config <filepath of the gin config the training session was based> \
+    --base_folder <base folder of training sessions> \
+    --label <the name of the run>
+This will produce a saved model into: <base_folder>/<label>/saved_model
+"""
+import os
+from typing import Sequence
+from . import model_lib
+from absl import app
+from absl import flags
+from absl import logging
+import gin.tf
+import tensorflow as tf
+tf.get_logger().setLevel('ERROR')
+_GIN_CONFIG = flags.DEFINE_string(
+    name='gin_config',
+    default='config.gin',
+    help='Gin config file, saved in the training session <root folder>.')
+_LABEL = flags.DEFINE_string(
+    name='label',
+    default=None,
+    required=True,
+    help='Descriptive label for the training session.')
+_BASE_FOLDER = flags.DEFINE_string(
+    name='base_folder',
+    default=None,
+    help='Path to all training sessions.')
+_MODE = flags.DEFINE_enum(
+    name='mode',
+    default=None,
+    enum_values=['cpu', 'gpu', 'tpu'],
+    help='Distributed strategy approach.')
+def _build_saved_model(checkpoint_path: str, config_files: Sequence[str],
+                       output_model_path: str):
+  """Builds a saved model based on the checkpoint directory."""
+  gin.parse_config_files_and_bindings(
+      config_files=config_files,
+      bindings=None,
+      skip_unknown=True)
+  model = model_lib.create_model()
+  checkpoint = tf.train.Checkpoint(model=model)
+  checkpoint_file = tf.train.latest_checkpoint(checkpoint_path)
+  try:
+    logging.info('Restoring from %s', checkpoint_file)
+    status = checkpoint.restore(checkpoint_file)
+    status.assert_existing_objects_matched()
+    status.expect_partial()
+    model.save(output_model_path)
+  except (tf.errors.NotFoundError, AssertionError) as err:
+    logging.info('Failed to restore checkpoint from %s. Error:\n%s',
+                 checkpoint_file, err)
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+  checkpoint_path = os.path.join(_BASE_FOLDER.value, _LABEL.value, 'train')
+  if not tf.io.gfile.exists(_GIN_CONFIG.value):
+    config_file = os.path.join(_BASE_FOLDER.value, _LABEL.value,
+                               _GIN_CONFIG.value)
+  else:
+    config_file = _GIN_CONFIG.value
+  output_model_path = os.path.join(_BASE_FOLDER.value, _LABEL.value,
+                                   'saved_model')
+  _build_saved_model(
+      checkpoint_path=checkpoint_path,
+      config_files=[config_file],
+      output_model_path=output_model_path)
+  logging.info('The saved model stored into %s/.', output_model_path)
+if __name__ == '__main__':
+  app.run(main)

training/config/film_net-L1.gin ADDED Viewed

	@@ -0,0 +1,55 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+model.name = 'film_net'
+film_net.pyramid_levels = 7
+film_net.fusion_pyramid_levels = 5
+film_net.specialized_levels = 3
+film_net.sub_levels = 4
+film_net.flow_convs = [3, 3, 3, 3]
+film_net.flow_filters = [32, 64, 128, 256]
+film_net.filters = 64
+training.learning_rate = 0.0001
+training.learning_rate_decay_steps = 750000
+training.learning_rate_decay_rate = 0.464158
+training.learning_rate_staircase = True
+training.num_steps = 3000000
+# in the sweep
+training_dataset.file = 'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/vimeo_interp_train.tfrecord@200'
+training_dataset.batch_size = 8
+training_dataset.crop_size = 256
+eval_datasets.batch_size = 1
+eval_datasets.max_examples = -1
+# eval_datasets.files = ['gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/vimeo_interp_test.tfrecord@3',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/middlebury_other.tfrecord@3',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/UCF101_interp_test.tfrecord@2',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/xiph_2K.tfrecord@2',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/xiph_4K.tfrecord@2']
+# eval_datasets.names = ['vimeo90K', 'middlebury', 'ucf101', 'xiph2K', 'xiph4K']
+eval_datasets.files = []
+eval_datasets.names = []
+# Training augmentation (in addition to random crop)
+data_augmentation.names = ['random_image_rot90', 'random_flip', 'random_rotate', 'random_reverse']
+# Loss functions
+training_losses.loss_names = ['l1']
+training_losses.loss_weights = [1.0]
+test_losses.loss_names = ['l1', 'psnr', 'ssim']
+test_losses.loss_weights = [1.0, 1.0, 1.0]

training/config/film_net-Style.gin ADDED Viewed

	@@ -0,0 +1,66 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+model.name = 'film_net'
+film_net.pyramid_levels = 7
+film_net.fusion_pyramid_levels = 5
+film_net.specialized_levels = 3
+film_net.sub_levels = 4
+film_net.flow_convs = [3, 3, 3, 3]
+film_net.flow_filters = [32, 64, 128, 256]
+film_net.filters = 64
+training.learning_rate = 0.0001
+training.learning_rate_decay_steps = 750000
+training.learning_rate_decay_rate = 0.464158
+training.learning_rate_staircase = True
+training.num_steps = 3000000
+# in the sweep
+training_dataset.file = 'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/vimeo_interp_train.tfrecord@200'
+training_dataset.batch_size = 8
+training_dataset.crop_size = 256
+eval_datasets.batch_size = 1
+eval_datasets.max_examples = -1
+# eval_datasets.files = ['gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/vimeo_interp_test.tfrecord@3',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/middlebury_other.tfrecord@3',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/UCF101_interp_test.tfrecord@2',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/xiph_2K.tfrecord@2',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/xiph_4K.tfrecord@2']
+# eval_datasets.names = ['vimeo90K', 'middlebury', 'ucf101', 'xiph2K', 'xiph4K']
+eval_datasets.files = []
+eval_datasets.names = []
+# Training augmentation (in addition to random crop)
+data_augmentation.names = ['random_image_rot90', 'random_flip', 'random_rotate', 'random_reverse']
+# Loss functions
+training_losses.loss_names = ['l1', 'vgg', 'style']
+training_losses.loss_weight_schedules = [
+    @tf.keras.optimizers.schedules.PiecewiseConstantDecay,
+    @tf.keras.optimizers.schedules.PiecewiseConstantDecay,
+    @tf.keras.optimizers.schedules.PiecewiseConstantDecay]
+# Increase the weight of style loss at 1.5M steps.
+training_losses.loss_weight_parameters = [
+    {'boundaries':[0], 'values':[1.0, 1.0]},
+    {'boundaries':[1500000], 'values':[1.0, 0.25]},
+    {'boundaries':[1500000], 'values':[0.0, 40.0]}]
+test_losses.loss_names = ['l1', 'psnr', 'ssim']
+test_losses.loss_weights = [1.0, 1.0, 1.0]
+vgg.vgg_model_file = 'gs://xcloud-shared/fitsumreda/frame_interpolation/pretrained_models/vgg/imagenet-vgg-verydeep-19.mat'
+style.vgg_model_file = 'gs://xcloud-shared/fitsumreda/frame_interpolation/pretrained_models/vgg/imagenet-vgg-verydeep-19.mat'

training/config/film_net-VGG.gin ADDED Viewed

	@@ -0,0 +1,64 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+model.name = 'film_net'
+film_net.pyramid_levels = 7
+film_net.fusion_pyramid_levels = 5
+film_net.specialized_levels = 3
+film_net.sub_levels = 4
+film_net.flow_convs = [3, 3, 3, 3]
+film_net.flow_filters = [32, 64, 128, 256]
+film_net.filters = 64
+training.learning_rate = 0.0001
+training.learning_rate_decay_steps = 750000
+training.learning_rate_decay_rate = 0.464158
+training.learning_rate_staircase = True
+training.num_steps = 3000000
+# in the sweep
+training_dataset.file = 'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/vimeo_interp_train.tfrecord@200'
+training_dataset.batch_size = 8
+training_dataset.crop_size = 256
+eval_datasets.batch_size = 1
+eval_datasets.max_examples = -1
+# eval_datasets.files = ['gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/vimeo_interp_test.tfrecord@3',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/middlebury_other.tfrecord@3',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/UCF101_interp_test.tfrecord@2',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/xiph_2K.tfrecord@2',
+#                       'gs://xcloud-shared/fitsumreda/frame_interpolation/datasets/xiph_4K.tfrecord@2']
+# eval_datasets.names = ['vimeo90K', 'middlebury', 'ucf101', 'xiph2K', 'xiph4K']
+eval_datasets.files = []
+eval_datasets.names = []
+# Training augmentation (in addition to random crop)
+data_augmentation.names = ['random_image_rot90', 'random_flip', 'random_rotate', 'random_reverse']
+# Loss functions
+training_losses.loss_names = ['l1', 'vgg']
+training_losses.loss_weight_schedules = [
+    @tf.keras.optimizers.schedules.PiecewiseConstantDecay,
+    @tf.keras.optimizers.schedules.PiecewiseConstantDecay]
+# Decrease the weight of VGG loss at 1.5M steps.
+training_losses.loss_weight_parameters = [
+    {'boundaries':[0], 'values':[1.0, 1.0]},
+    {'boundaries':[1500000], 'values':[1.0, 0.25]}]
+test_losses.loss_names = ['l1', 'psnr', 'ssim']
+test_losses.loss_weights = [1.0, 1.0, 1.0]
+vgg.vgg_model_file = 'gs://xcloud-shared/fitsumreda/frame_interpolation/pretrained_models/vgg/imagenet-vgg-verydeep-19.mat'

training/data_lib.py ADDED Viewed

	@@ -0,0 +1,296 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Dataset creation for frame interpolation."""
+from typing import Callable, Dict, List, Optional
+from absl import logging
+import gin.tf
+import tensorflow as tf
+def _create_feature_map() -> Dict[str, tf.io.FixedLenFeature]:
+  """Creates the feature map for extracting the frame triplet."""
+  feature_map = {
+      'frame_0/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_0/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_0/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_0/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_1/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_1/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_1/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/encoded':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+      'frame_2/format':
+          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
+      'frame_2/height':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'frame_2/width':
+          tf.io.FixedLenFeature((), tf.int64, default_value=0),
+      'path':
+          tf.io.FixedLenFeature((), tf.string, default_value=''),
+  }
+  return feature_map
+def _parse_example(sample):
+  """Parses a serialized sample.
+  Args:
+    sample: A serialized tf.Example to be parsed.
+  Returns:
+    dictionary containing the following:
+      encoded_image
+      image_height
+      image_width
+  """
+  feature_map = _create_feature_map()
+  features = tf.io.parse_single_example(sample, feature_map)
+  output_dict = {
+      'x0': tf.io.decode_image(features['frame_0/encoded'], dtype=tf.float32),
+      'x1': tf.io.decode_image(features['frame_2/encoded'], dtype=tf.float32),
+      'y': tf.io.decode_image(features['frame_1/encoded'], dtype=tf.float32),
+      # The fractional time value of frame_1 is not included in our tfrecords,
+      # but is always at 0.5. The model will expect this to be specificed, so
+      # we insert it here.
+      'time': 0.5,
+      # Store the original mid frame filepath for identifying examples.
+      'path': features['path'],
+  }
+  return output_dict
+def _random_crop_images(crop_size: int, images: tf.Tensor,
+                        total_channel_size: int) -> tf.Tensor:
+  """Crops the tensor with random offset to the given size."""
+  if crop_size > 0:
+    crop_shape = tf.constant([crop_size, crop_size, total_channel_size])
+    images = tf.image.random_crop(images, crop_shape)
+  return images
+def crop_example(example: tf.Tensor, crop_size: int,
+                 crop_keys: Optional[List[str]] = None):
+  """Random crops selected images in the example to given size and keys.
+  Args:
+    example: Input tensor representing images to be cropped.
+    crop_size: The size to crop images to. This value is used for both
+      height and width.
+    crop_keys: The images in the input example to crop.
+  Returns:
+    Example with cropping applied to selected images.
+  """
+  if crop_keys is None:
+    crop_keys = ['x0', 'x1', 'y']
+    channels = [3, 3, 3]
+  # Stack images along channel axis, and perform a random crop once.
+  image_to_crop = [example[key] for key in crop_keys]
+  stacked_images = tf.concat(image_to_crop, axis=-1)
+  cropped_images = _random_crop_images(crop_size, stacked_images, sum(channels))
+  cropped_images = tf.split(
+      cropped_images, num_or_size_splits=channels, axis=-1)
+  for key, cropped_image in zip(crop_keys, cropped_images):
+    example[key] = cropped_image
+  return example
+def apply_data_augmentation(
+    augmentation_fns: Dict[str, Callable[..., tf.Tensor]],
+    example: tf.Tensor,
+    augmentation_keys: Optional[List[str]] = None) -> tf.Tensor:
+  """Applies random augmentation in succession to selected image keys.
+  Args:
+    augmentation_fns: A Dict of Callables to data augmentation functions.
+    example: Input tensor representing images to be augmented.
+    augmentation_keys: The images in the input example to augment.
+  Returns:
+    Example with augmentation applied to selected images.
+  """
+  if augmentation_keys is None:
+    augmentation_keys = ['x0', 'x1', 'y']
+  # Apply each augmentation in sequence
+  augmented_images = {key: example[key] for key in augmentation_keys}
+  for augmentation_function in augmentation_fns.values():
+    augmented_images = augmentation_function(augmented_images)
+  for key in augmentation_keys:
+    example[key] = augmented_images[key]
+  return example
+def _create_from_tfrecord(batch_size, file, augmentation_fns,
+                          crop_size) -> tf.data.Dataset:
+  """Creates a dataset from TFRecord."""
+  dataset = tf.data.TFRecordDataset(file)
+  dataset = dataset.map(
+      _parse_example, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+  # Perform data_augmentation before cropping and batching
+  if augmentation_fns is not None:
+    dataset = dataset.map(
+        lambda x: apply_data_augmentation(augmentation_fns, x),
+        num_parallel_calls=tf.data.experimental.AUTOTUNE)
+  if crop_size > 0:
+    dataset = dataset.map(
+        lambda x: crop_example(x, crop_size=crop_size),
+        num_parallel_calls=tf.data.experimental.AUTOTUNE)
+  dataset = dataset.batch(batch_size, drop_remainder=True)
+  return dataset
+def _generate_sharded_filenames(filename: str) -> List[str]:
+  """Generates filenames of the each file in the sharded filepath.
+  Based on github.com/google/revisiting-self-supervised/blob/master/datasets.py.
+  Args:
+    filename: The sharded filepath.
+  Returns:
+    A list of filepaths for each file in the shard.
+  """
+  base, count = filename.split('@')
+  count = int(count)
+  return ['{}-{:05d}-of-{:05d}'.format(base, i, count) for i in range(count)]
+def _create_from_sharded_tfrecord(batch_size,
+                                  train_mode,
+                                  file,
+                                  augmentation_fns,
+                                  crop_size,
+                                  max_examples=-1) -> tf.data.Dataset:
+  """Creates a dataset from a sharded tfrecord."""
+  dataset = tf.data.Dataset.from_tensor_slices(
+      _generate_sharded_filenames(file))
+  # pylint: disable=g-long-lambda
+  dataset = dataset.interleave(
+      lambda x: _create_from_tfrecord(
+          batch_size,
+          file=x,
+          augmentation_fns=augmentation_fns,
+          crop_size=crop_size),
+      num_parallel_calls=tf.data.AUTOTUNE,
+      deterministic=not train_mode)
+  # pylint: enable=g-long-lambda
+  dataset = dataset.prefetch(buffer_size=2)
+  if max_examples > 0:
+    return dataset.take(max_examples)
+  return dataset
+@gin.configurable('training_dataset')
+def create_training_dataset(
+    batch_size: int,
+    file: Optional[str] = None,
+    files: Optional[List[str]] = None,
+    crop_size: int = -1,
+    crop_sizes: Optional[List[int]] = None,
+    augmentation_fns: Optional[Dict[str, Callable[..., tf.Tensor]]] = None
+) -> tf.data.Dataset:
+  """Creates the training dataset.
+  The given tfrecord should contain data in a format produced by
+  frame_interpolation/datasets/create_*_tfrecord.py
+  Args:
+    batch_size: The number of images to batch per example.
+    file: (deprecated) A path to a sharded tfrecord in <tfrecord>@N format.
+      Deprecated. Use 'files' instead.
+    files: A list of paths to sharded tfrecords in <tfrecord>@N format.
+    crop_size: (deprecated) If > 0, images are cropped to crop_size x crop_size
+      using tensorflow's random cropping. Deprecated: use 'files' and
+      'crop_sizes' instead.
+    crop_sizes: List of crop sizes. If > 0, images are cropped to
+      crop_size x crop_size using tensorflow's random cropping.
+    augmentation_fns: A Dict of Callables to data augmentation functions.
+  Returns:
+    A tensorflow dataset for accessing examples that contain the input images
+    'x0', 'x1', ground truth 'y' and time of the ground truth 'time'=[0,1] in a
+    dictionary of tensors.
+  """
+  if file:
+    logging.warning('gin-configurable training_dataset.file is deprecated. '
+                    'Use training_dataset.files instead.')
+    return _create_from_sharded_tfrecord(batch_size, True, file,
+                                         augmentation_fns, crop_size)
+  else:
+    if not crop_sizes or len(crop_sizes) != len(files):
+      raise ValueError('Please pass crop_sizes[] with training_dataset.files.')
+    if crop_size > 0:
+      raise ValueError(
+          'crop_size should not be used with files[], use crop_sizes[] instead.'
+      )
+    tables = []
+    for file, crop_size in zip(files, crop_sizes):
+      tables.append(
+          _create_from_sharded_tfrecord(batch_size, True, file,
+                                        augmentation_fns, crop_size))
+    return tf.data.experimental.sample_from_datasets(tables)
+@gin.configurable('eval_datasets')
+def create_eval_datasets(batch_size: int,
+                         files: List[str],
+                         names: List[str],
+                         crop_size: int = -1,
+                         max_examples: int = -1) -> Dict[str, tf.data.Dataset]:
+  """Creates the evaluation datasets.
+  As opposed to create_training_dataset this function makes sure that the
+  examples for each dataset are always read in a deterministic (same) order.
+  Each given tfrecord should contain data in a format produced by
+  frame_interpolation/datasets/create_*_tfrecord.py
+  The (batch_size, crop_size, max_examples) are specified for all eval datasets.
+  Args:
+    batch_size: The number of images to batch per example.
+    files: List of paths to a sharded tfrecord in <tfrecord>@N format.
+    names: List of names of eval datasets.
+    crop_size: If > 0, images are cropped to crop_size x crop_size using
+      tensorflow's random cropping.
+    max_examples: If > 0, truncate the dataset to 'max_examples' in length. This
+      can be useful for speeding up evaluation loop in case the tfrecord for the
+      evaluation set is very large.
+  Returns:
+    A dict of name to tensorflow dataset for accessing examples that contain the
+    input images 'x0', 'x1', ground truth 'y' and time of the ground truth
+    'time'=[0,1] in a dictionary of tensors.
+  """
+  return {
+      name: _create_from_sharded_tfrecord(batch_size, False, file, None,
+                                          crop_size, max_examples)
+      for name, file in zip(names, files)
+  }

training/eval_lib.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Evaluation library for frame interpolation."""
+from typing import Dict, Mapping, Text
+from absl import logging
+import tensorflow as tf
+def _collect_tensors(tensors: tf.Tensor) -> tf.Tensor:
+  """Collect tensors of the different replicas into a list."""
+  return tf.nest.flatten(tensors, expand_composites=True)
+@tf.function
+def _distributed_eval_step(strategy: tf.distribute.Strategy,
+                           batch: Dict[Text, tf.Tensor], model: tf.keras.Model,
+                           metrics: Dict[Text, tf.keras.metrics.Metric],
+                           checkpoint_step: int) -> Dict[Text, tf.Tensor]:
+  """Distributed eval step.
+  Args:
+    strategy: A Tensorflow distribution strategy.
+    batch: A batch of training examples.
+    model: The Keras model to evaluate.
+    metrics: The Keras metrics used for evaluation (a dictionary).
+    checkpoint_step: The iteration number at which the checkpoint is restored.
+  Returns:
+    list of predictions from each replica.
+  """
+  def _eval_step(
+      batch: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]:
+    """Eval for one step."""
+    predictions = model(batch, training=False)
+    # Note: these metrics expect batch and prediction dictionaries rather than
+    # tensors like standard TF metrics do. This allows our losses and metrics to
+    # use a richer set of inputs than just the predicted final image.
+    for metric in metrics.values():
+      metric.update_state(batch, predictions, checkpoint_step=checkpoint_step)
+    return predictions
+  return strategy.run(_eval_step, args=(batch,))
+def _summarize_image_tensors(combined, prefix, step):
+  for name in combined:
+    image = combined[name]
+    if isinstance(image, tf.Tensor):
+      if len(image.shape) == 4 and (image.shape[-1] == 1 or
+                                    image.shape[-1] == 3):
+        tf.summary.image(prefix + '/' + name, image, step=step)
+def eval_loop(strategy: tf.distribute.Strategy,
+              eval_base_folder: str,
+              model: tf.keras.Model,
+              metrics: Dict[str, tf.keras.metrics.Metric],
+              datasets: Mapping[str, tf.data.Dataset],
+              summary_writer: tf.summary.SummaryWriter,
+              checkpoint_step: int):
+  """Eval function that is strategy agnostic.
+  Args:
+    strategy: A Tensorflow distributed strategy.
+    eval_base_folder: A path to where the summaries event files and
+      checkpoints will be saved.
+    model: A function that returns the model.
+    metrics: A function that returns the metrics dictionary.
+    datasets: A dict of tf.data.Dataset to evaluate on.
+    summary_writer: Eval summary writer.
+    checkpoint_step: The number of iterations completed.
+  """
+  logging.info('Saving eval summaries to: %s...', eval_base_folder)
+  summary_writer.set_as_default()
+  for dataset_name, dataset in datasets.items():
+    for metric in metrics.values():
+      metric.reset_states()
+    logging.info('Loading %s testing data ...', dataset_name)
+    dataset = strategy.experimental_distribute_dataset(dataset)
+    logging.info('Evaluating %s ...', dataset_name)
+    batch_idx = 0
+    max_batches_to_summarize = 10
+    for batch in dataset:
+      predictions = _distributed_eval_step(strategy, batch, model, metrics,
+                                           checkpoint_step)
+      # Clip interpolator output to [0,1]. Clipping is done only
+      # on the eval loop to get better metrics, but not on the training loop
+      # so gradients are not killed.
+      if strategy.num_replicas_in_sync > 1:
+        predictions = {
+            'image': tf.concat(predictions['image'].values, axis=0)
+        }
+      predictions['image'] = tf.clip_by_value(predictions['image'], 0., 1.)
+      if batch_idx % 10 == 0:
+        logging.info('Evaluating batch %s', batch_idx)
+      batch_idx = batch_idx + 1
+      if batch_idx < max_batches_to_summarize:
+        # Loop through the global batch:
+        prefix = f'{dataset_name}/eval_{batch_idx}'
+        # Find all tensors that look like images, and summarize:
+        combined = {**batch, **predictions}
+        _summarize_image_tensors(combined, prefix, step=checkpoint_step)
+      elif batch_idx == max_batches_to_summarize:
+        tf.summary.flush()
+    for name, metric in metrics.items():
+      tf.summary.scalar(
+          f'{dataset_name}/{name}', metric.result(), step=checkpoint_step)
+      tf.summary.flush()
+      logging.info('Step {:2}, {} {}'.format(checkpoint_step,
+                                             f'{dataset_name}/{name}',
+                                             metric.result().numpy()))
+      metric.reset_states()

training/metrics_lib.py ADDED Viewed

	@@ -0,0 +1,142 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A library for instantiating frame interpolation evaluation metrics."""
+from typing import Callable, Dict, Text
+from ..losses import losses
+import tensorflow as tf
+class TrainLossMetric(tf.keras.metrics.Metric):
+  """Compute training loss for our example and prediction format.
+  The purpose of this is to ensure that we always include a loss that is exactly
+  like the training loss into the evaluation in order to detect possible
+  overfitting.
+  """
+  def __init__(self, name='eval_loss', **kwargs):
+    super(TrainLossMetric, self).__init__(name=name, **kwargs)
+    self.acc = self.add_weight(name='train_metric_acc', initializer='zeros')
+    self.count = self.add_weight(name='train_metric_count', initializer='zeros')
+  def update_state(self,
+                   batch,
+                   predictions,
+                   sample_weight=None,
+                   checkpoint_step=0):
+    loss_functions = losses.training_losses()
+    loss_list = []
+    for (loss_value, loss_weight) in loss_functions.values():
+      loss_list.append(
+          loss_value(batch, predictions) * loss_weight(checkpoint_step))
+    loss = tf.add_n(loss_list)
+    self.acc.assign_add(loss)
+    self.count.assign_add(1)
+  def result(self):
+    return self.acc / self.count
+  def reset_states(self):
+    self.acc.assign(0)
+    self.count.assign(0)
+class L1Metric(tf.keras.metrics.Metric):
+  """Compute L1 over our training example and prediction format.
+  The purpose of this is to ensure that we have at least one metric that is
+  compatible across all eval the session and allows us to quickly compare models
+  against each other.
+  """
+  def __init__(self, name='eval_loss', **kwargs):
+    super(L1Metric, self).__init__(name=name, **kwargs)
+    self.acc = self.add_weight(name='l1_metric_acc', initializer='zeros')
+    self.count = self.add_weight(name='l1_metric_count', initializer='zeros')
+  def update_state(self, batch, prediction, sample_weight=None,
+                   checkpoint_step=0):
+    self.acc.assign_add(losses.l1_loss(batch, prediction))
+    self.count.assign_add(1)
+  def result(self):
+    return self.acc / self.count
+  def reset_states(self):
+    self.acc.assign(0)
+    self.count.assign(0)
+class GenericLossMetric(tf.keras.metrics.Metric):
+  """Metric based on any loss function."""
+  def __init__(self, name: str, loss: Callable[..., tf.Tensor],
+               weight: Callable[..., tf.Tensor], **kwargs):
+    """Initializes a metric based on a loss function and a weight schedule.
+    Args:
+      name: The name of the metric.
+      loss: The callable loss that calculates a loss value for a (prediction,
+        target) pair.
+      weight: The callable weight scheduling function that samples a weight
+        based on iteration.
+      **kwargs: Any additional keyword arguments to be passed.
+    """
+    super(GenericLossMetric, self).__init__(name=name, **kwargs)
+    self.acc = self.add_weight(name='loss_metric_acc', initializer='zeros')
+    self.count = self.add_weight(name='loss_metric_count', initializer='zeros')
+    self.loss = loss
+    self.weight = weight
+  def update_state(self,
+                   batch,
+                   predictions,
+                   sample_weight=None,
+                   checkpoint_step=0):
+    self.acc.assign_add(
+        self.loss(batch, predictions) * self.weight(checkpoint_step))
+    self.count.assign_add(1)
+  def result(self):
+    return self.acc / self.count
+  def reset_states(self):
+    self.acc.assign(0)
+    self.count.assign(0)
+def create_metrics_fn() -> Dict[Text, tf.keras.metrics.Metric]:
+  """Create evaluation metrics.
+  L1 and total training loss are added by default.
+  The rest are the configured by the test_losses item via gin.
+  Returns:
+    A dictionary from metric name to Keras Metric object.
+  """
+  metrics = {}
+  # L1 is explicitly added just so we always have some consistent numbers around
+  # to compare across sessions.
+  metrics['l1'] = L1Metric()
+  # We also always include training loss for the eval set to detect overfitting:
+  metrics['training_loss'] = TrainLossMetric()
+  test_losses = losses.test_losses()
+  for loss_name, (loss_value, loss_weight) in test_losses.items():
+    metrics[loss_name] = GenericLossMetric(
+        name=loss_name, loss=loss_value, weight=loss_weight)
+  return metrics

training/model_lib.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A library for instantiating the model for training frame interpolation.
+All models are expected to use three inputs: input image batches 'x0' and 'x1'
+and 'time', the fractional time where the output should be generated.
+The models are expected to output the prediction as a dictionary that contains
+at least the predicted image batch as 'image' plus optional data for debug,
+analysis or custom losses.
+"""
+import gin.tf
+from ..models.film_net import interpolator as film_net_interpolator
+from ..models.film_net import options as film_net_options
+import tensorflow as tf
+@gin.configurable('model')
+def create_model(name: str) -> tf.keras.Model:
+  """Creates the frame interpolation model based on given model name."""
+  if name == 'film_net':
+    return _create_film_net_model()  # pylint: disable=no-value-for-parameter
+  else:
+    raise ValueError(f'Model {name} not implemented.')
+def _create_film_net_model() -> tf.keras.Model:
+  """Creates the film_net interpolator."""
+  # Options are gin-configured in the Options class directly.
+  options = film_net_options.Options()
+  x0 = tf.keras.Input(
+      shape=(None, None, 3), batch_size=None, dtype=tf.float32, name='x0')
+  x1 = tf.keras.Input(
+      shape=(None, None, 3), batch_size=None, dtype=tf.float32, name='x1')
+  time = tf.keras.Input(
+      shape=(1,), batch_size=None, dtype=tf.float32, name='time')
+  return film_net_interpolator.create_model(x0, x1, time, options)

training/train.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""The training loop for frame interpolation.
+gin_config: The gin configuration file containing model, losses and datasets.
+To run on GPUs:
+  python3 -m frame_interpolation.training.train \
+      --gin_config <path to  network.gin> \
+      --base_folder <base folder for all training runs> \
+      --label <descriptive label for the run>
+To debug the training loop on CPU:
+  python3 -m frame_interpolation.training.train \
+      --gin_config <path to config.gin> \
+      --base_folder /tmp
+      --label test_run \
+      --mode cpu
+The training output directory will be created at <base_folder>/<label>.
+"""
+import os
+from . import augmentation_lib
+from . import data_lib
+from . import eval_lib
+from . import metrics_lib
+from . import model_lib
+from . import train_lib
+from absl import app
+from absl import flags
+from absl import logging
+import gin.tf
+from ..losses import losses
+# Reduce tensorflow logs to ERRORs only.
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+import tensorflow as tf  # pylint: disable=g-import-not-at-top
+tf.get_logger().setLevel('ERROR')
+_GIN_CONFIG = flags.DEFINE_string('gin_config', None, 'Gin config file.')
+_LABEL = flags.DEFINE_string('label', 'run0',
+                             'Descriptive label for this run.')
+_BASE_FOLDER = flags.DEFINE_string('base_folder', None,
+                                   'Path to checkpoints/summaries.')
+_MODE = flags.DEFINE_enum('mode', 'gpu', ['cpu', 'gpu'],
+                          'Distributed strategy approach.')
+@gin.configurable('training')
+class TrainingOptions(object):
+  """Training-related options."""
+  def __init__(self, learning_rate: float, learning_rate_decay_steps: int,
+               learning_rate_decay_rate: int, learning_rate_staircase: int,
+               num_steps: int):
+    self.learning_rate = learning_rate
+    self.learning_rate_decay_steps = learning_rate_decay_steps
+    self.learning_rate_decay_rate = learning_rate_decay_rate
+    self.learning_rate_staircase = learning_rate_staircase
+    self.num_steps = num_steps
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+  output_dir = os.path.join(_BASE_FOLDER.value, _LABEL.value)
+  logging.info('Creating output_dir @ %s ...', output_dir)
+  # Copy config file to <base_folder>/<label>/config.gin.
+  tf.io.gfile.makedirs(output_dir)
+  tf.io.gfile.copy(
+      _GIN_CONFIG.value, os.path.join(output_dir, 'config.gin'), overwrite=True)
+  gin.external_configurable(
+      tf.keras.optimizers.schedules.PiecewiseConstantDecay,
+      module='tf.keras.optimizers.schedules')
+  gin_configs = [_GIN_CONFIG.value]
+  gin.parse_config_files_and_bindings(
+      config_files=gin_configs, bindings=None, skip_unknown=True)
+  training_options = TrainingOptions()  # pylint: disable=no-value-for-parameter
+  learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
+      training_options.learning_rate,
+      training_options.learning_rate_decay_steps,
+      training_options.learning_rate_decay_rate,
+      training_options.learning_rate_staircase,
+      name='learning_rate')
+  # Initialize data augmentation functions
+  augmentation_fns = augmentation_lib.data_augmentations()
+  saved_model_folder = os.path.join(_BASE_FOLDER.value, _LABEL.value,
+                                    'saved_model')
+  train_folder = os.path.join(_BASE_FOLDER.value, _LABEL.value, 'train')
+  eval_folder = os.path.join(_BASE_FOLDER.value, _LABEL.value, 'eval')
+  train_lib.train(
+      strategy=train_lib.get_strategy(_MODE.value),
+      train_folder=train_folder,
+      saved_model_folder=saved_model_folder,
+      n_iterations=training_options.num_steps,
+      create_model_fn=model_lib.create_model,
+      create_losses_fn=losses.training_losses,
+      create_metrics_fn=metrics_lib.create_metrics_fn,
+      dataset=data_lib.create_training_dataset(
+          augmentation_fns=augmentation_fns),
+      learning_rate=learning_rate,
+      eval_loop_fn=eval_lib.eval_loop,
+      eval_folder=eval_folder,
+      eval_datasets=data_lib.create_eval_datasets() or None)
+if __name__ == '__main__':
+  app.run(main)

training/train_lib.py ADDED Viewed

	@@ -0,0 +1,343 @@

+# Copyright 2022 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     https://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Training library for frame interpolation using distributed strategy."""
+import functools
+from typing import Any, Callable, Dict, Text, Tuple
+from absl import logging
+import tensorflow as tf
+def _concat_tensors(tensors: tf.Tensor) -> tf.Tensor:
+  """Concat tensors of the different replicas."""
+  return tf.concat(tf.nest.flatten(tensors, expand_composites=True), axis=0)
+@tf.function
+def _distributed_train_step(strategy: tf.distribute.Strategy,
+                            batch: Dict[Text, tf.Tensor], model: tf.keras.Model,
+                            loss_functions: Dict[Text,
+                                                 Tuple[Callable[..., tf.Tensor],
+                                                       Callable[...,
+                                                                tf.Tensor]]],
+                            optimizer: tf.keras.optimizers.Optimizer,
+                            iterations: int) -> Dict[Text, Any]:
+  """Distributed training step.
+  Args:
+    strategy: A Tensorflow distribution strategy.
+    batch: A batch of training examples.
+    model: The Keras model to train.
+    loss_functions: The list of Keras losses used to train the model.
+    optimizer: The Keras optimizer used to train the model.
+    iterations: Iteration number used to sample weights to each loss.
+  Returns:
+    A dictionary of train step outputs.
+  """
+  def _train_step(batch: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]:
+    """Train for one step."""
+    with tf.GradientTape() as tape:
+      predictions = model(batch, training=True)
+      losses = []
+      for (loss_value, loss_weight) in loss_functions.values():
+        losses.append(loss_value(batch, predictions) * loss_weight(iterations))
+      loss = tf.add_n(losses)
+    grads = tape.gradient(loss, model.trainable_variables)
+    optimizer.apply_gradients(zip(grads, model.trainable_variables))
+    # post process for visualization
+    all_data = {'loss': loss}
+    all_data.update(batch)
+    all_data.update(predictions)
+    return all_data
+  step_outputs = strategy.run(_train_step, args=(batch,))
+  loss = strategy.reduce(
+      tf.distribute.ReduceOp.MEAN, step_outputs['loss'], axis=None)
+  x0 = _concat_tensors(step_outputs['x0'])
+  x1 = _concat_tensors(step_outputs['x1'])
+  y = _concat_tensors(step_outputs['y'])
+  pred_y = _concat_tensors(step_outputs['image'])
+  scalar_summaries = {'training_loss': loss}
+  image_summaries = {
+      'x0': x0,
+      'x1': x1,
+      'y': y,
+      'pred_y': pred_y
+  }
+  extra_images = {
+      'importance0', 'importance1', 'x0_warped', 'x1_warped', 'fg_image',
+      'bg_image', 'fg_alpha', 'x1_unfiltered_warped'
+  }
+  for image in extra_images:
+    if image in step_outputs:
+      image_summaries[image] = _concat_tensors(step_outputs[image])
+  return {
+      'loss': loss,
+      'scalar_summaries': scalar_summaries,
+      'image_summaries': {
+          f'training/{name}': value for name, value in image_summaries.items()
+      }
+  }
+def _summary_writer(summaries_dict: Dict[Text, Any]) -> None:
+  """Adds scalar and image summaries."""
+  # Adds scalar summaries.
+  for key, scalars in summaries_dict['scalar_summaries'].items():
+    tf.summary.scalar(key, scalars)
+  # Adds image summaries.
+  for key, images in summaries_dict['image_summaries'].items():
+    tf.summary.image(key, tf.clip_by_value(images, 0.0, 1.0))
+    tf.summary.histogram(key + '_h', images)
+def train_loop(
+    strategy: tf.distribute.Strategy,
+    train_set: tf.data.Dataset,
+    create_model_fn: Callable[..., tf.keras.Model],
+    create_losses_fn: Callable[..., Dict[str, Tuple[Callable[..., tf.Tensor],
+                                                    Callable[..., tf.Tensor]]]],
+    create_optimizer_fn: Callable[..., tf.keras.optimizers.Optimizer],
+    distributed_train_step_fn: Callable[[
+        tf.distribute.Strategy, Dict[str, tf.Tensor], tf.keras.Model, Dict[
+            str,
+            Tuple[Callable[..., tf.Tensor],
+                  Callable[..., tf.Tensor]]], tf.keras.optimizers.Optimizer, int
+    ], Dict[str, Any]],
+    eval_loop_fn: Callable[..., None],
+    create_metrics_fn: Callable[..., Dict[str, tf.keras.metrics.Metric]],
+    eval_folder: Dict[str, Any],
+    eval_datasets: Dict[str, tf.data.Dataset],
+    summary_writer_fn: Callable[[Dict[str, Any]], None],
+    train_folder: str,
+    saved_model_folder: str,
+    num_iterations: int,
+    save_summaries_frequency: int = 500,
+    save_checkpoint_frequency: int = 500,
+    checkpoint_max_to_keep: int = 10,
+    checkpoint_save_every_n_hours: float = 2.,
+    timing_frequency: int = 100,
+    logging_frequency: int = 10):
+  """A Tensorflow 2 eager mode training loop.
+  Args:
+    strategy: A Tensorflow distributed strategy.
+    train_set: A tf.data.Dataset to loop through for training.
+    create_model_fn: A callable that returns a tf.keras.Model.
+    create_losses_fn: A callable that returns a tf.keras.losses.Loss.
+    create_optimizer_fn: A callable that returns a
+      tf.keras.optimizers.Optimizer.
+    distributed_train_step_fn: A callable that takes a distribution strategy, a
+      Dict[Text, tf.Tensor] holding the batch of training data, a
+      tf.keras.Model, a tf.keras.losses.Loss, a tf.keras.optimizers.Optimizer,
+      iteartion number to sample a weight value to loos functions,
+      and returns a dictionary to be passed to the summary_writer_fn.
+    eval_loop_fn: Eval loop function.
+    create_metrics_fn: create_metric_fn.
+    eval_folder: A path to where the summary event files and checkpoints will be
+      saved.
+    eval_datasets: A dictionary of evalution tf.data.Dataset to loop through for
+      evaluation.
+    summary_writer_fn: A callable that takes the output of
+      distributed_train_step_fn and writes summaries to be visualized in
+      TensorBoard.
+    train_folder: A path to where the summaries event files and checkpoints
+      will be saved.
+    saved_model_folder: A path to where the saved models are stored.
+    num_iterations: An integer, the number of iterations to train for.
+    save_summaries_frequency: The iteration frequency with which summaries are
+      saved.
+    save_checkpoint_frequency: The iteration frequency with which model
+      checkpoints are saved.
+    checkpoint_max_to_keep: The maximum number of checkpoints to keep.
+    checkpoint_save_every_n_hours: The frequency in hours to keep checkpoints.
+    timing_frequency: The iteration frequency with which to log timing.
+    logging_frequency: How often to output with logging.info().
+  """
+  logging.info('Creating training tensorboard summaries ...')
+  summary_writer = tf.summary.create_file_writer(train_folder)
+  if eval_datasets is not None:
+    logging.info('Creating eval tensorboard summaries ...')
+    eval_summary_writer = tf.summary.create_file_writer(eval_folder)
+  train_set = strategy.experimental_distribute_dataset(train_set)
+  with strategy.scope():
+    logging.info('Building model ...')
+    model = create_model_fn()
+    loss_functions = create_losses_fn()
+    optimizer = create_optimizer_fn()
+    if eval_datasets is not None:
+      metrics = create_metrics_fn()
+  logging.info('Creating checkpoint ...')
+  checkpoint = tf.train.Checkpoint(
+      model=model,
+      optimizer=optimizer,
+      step=optimizer.iterations,
+      epoch=tf.Variable(0, dtype=tf.int64, trainable=False),
+      training_finished=tf.Variable(False, dtype=tf.bool, trainable=False))
+  logging.info('Restoring old model (if exists) ...')
+  checkpoint_manager = tf.train.CheckpointManager(
+      checkpoint,
+      directory=train_folder,
+      max_to_keep=checkpoint_max_to_keep,
+      keep_checkpoint_every_n_hours=checkpoint_save_every_n_hours)
+  with strategy.scope():
+    if checkpoint_manager.latest_checkpoint:
+      checkpoint.restore(checkpoint_manager.latest_checkpoint)
+  logging.info('Creating Timer ...')
+  timer = tf.estimator.SecondOrStepTimer(every_steps=timing_frequency)
+  timer.update_last_triggered_step(optimizer.iterations.numpy())
+  logging.info('Training on devices: %s.', [
+      el.name.split('/physical_device:')[-1]
+      for el in tf.config.get_visible_devices()
+  ])
+  # Re-assign training_finished=False, in case we restored a checkpoint.
+  checkpoint.training_finished.assign(False)
+  while optimizer.iterations.numpy() < num_iterations:
+    for i_batch, batch in enumerate(train_set):
+      summary_writer.set_as_default()
+      iterations = optimizer.iterations.numpy()
+      if iterations % logging_frequency == 0:
+        # Log epoch, total iterations and batch index.
+        logging.info('epoch %d; iterations %d; i_batch %d',
+                     checkpoint.epoch.numpy(), iterations,
+                     i_batch)
+      # Break if the number of iterations exceeds the max.
+      if iterations >= num_iterations:
+        break
+      # Compute distributed step outputs.
+      distributed_step_outputs = distributed_train_step_fn(
+          strategy, batch, model, loss_functions, optimizer, iterations)
+      # Save checkpoint, and optionally run the eval loops.
+      if iterations % save_checkpoint_frequency == 0:
+        checkpoint_manager.save(checkpoint_number=iterations)
+        if eval_datasets is not None:
+          eval_loop_fn(
+              strategy=strategy,
+              eval_base_folder=eval_folder,
+              model=model,
+              metrics=metrics,
+              datasets=eval_datasets,
+              summary_writer=eval_summary_writer,
+              checkpoint_step=iterations)
+      # Write summaries.
+      if iterations % save_summaries_frequency == 0:
+        tf.summary.experimental.set_step(step=iterations)
+        summary_writer_fn(distributed_step_outputs)
+        tf.summary.scalar('learning_rate',
+                          optimizer.learning_rate(iterations).numpy())
+      # Log steps/sec.
+      if timer.should_trigger_for_step(iterations):
+        elapsed_time, elapsed_steps = timer.update_last_triggered_step(
+            iterations)
+        if elapsed_time is not None:
+          steps_per_second = elapsed_steps / elapsed_time
+          tf.summary.scalar(
+              'steps/sec', steps_per_second, step=optimizer.iterations)
+    # Increment epoch.
+    checkpoint.epoch.assign_add(1)
+  # Assign training_finished variable to True after training is finished and
+  # save the last checkpoint.
+  checkpoint.training_finished.assign(True)
+  checkpoint_manager.save(checkpoint_number=optimizer.iterations.numpy())
+  # Generate a saved model.
+  model.save(saved_model_folder)
+def train(strategy: tf.distribute.Strategy, train_folder: str,
+          saved_model_folder: str, n_iterations: int,
+          create_model_fn: Callable[..., tf.keras.Model],
+          create_losses_fn: Callable[..., Dict[str,
+                                               Tuple[Callable[..., tf.Tensor],
+                                                     Callable[...,
+                                                              tf.Tensor]]]],
+          create_metrics_fn: Callable[..., Dict[str, tf.keras.metrics.Metric]],
+          dataset: tf.data.Dataset,
+          learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule,
+          eval_loop_fn: Callable[..., None],
+          eval_folder: str,
+          eval_datasets: Dict[str, tf.data.Dataset]):
+  """Training function that is strategy agnostic.
+  Args:
+    strategy: A Tensorflow distributed strategy.
+    train_folder: A path to where the summaries event files and checkpoints
+      will be saved.
+    saved_model_folder: A path to where the saved models are stored.
+    n_iterations: An integer, the number of iterations to train for.
+    create_model_fn: A callable that returns tf.keras.Model.
+    create_losses_fn: A callable that returns the losses.
+    create_metrics_fn: A function that returns the metrics dictionary.
+    dataset: The tensorflow dataset object.
+    learning_rate: Keras learning rate schedule object.
+    eval_loop_fn: eval loop function.
+    eval_folder: A path to where eval summaries event files and checkpoints
+      will be saved.
+    eval_datasets: The tensorflow evaluation dataset objects.
+  """
+  train_loop(
+      strategy=strategy,
+      train_set=dataset,
+      create_model_fn=create_model_fn,
+      create_losses_fn=create_losses_fn,
+      create_optimizer_fn=functools.partial(
+          tf.keras.optimizers.Adam, learning_rate=learning_rate),
+      distributed_train_step_fn=_distributed_train_step,
+      eval_loop_fn=eval_loop_fn,
+      create_metrics_fn=create_metrics_fn,
+      eval_folder=eval_folder,
+      eval_datasets=eval_datasets,
+      summary_writer_fn=_summary_writer,
+      train_folder=train_folder,
+      saved_model_folder=saved_model_folder,
+      num_iterations=n_iterations,
+      save_summaries_frequency=3000,
+      save_checkpoint_frequency=3000)
+def get_strategy(mode) -> tf.distribute.Strategy:
+  """Creates a distributed strategy."""
+  strategy = None
+  if mode == 'cpu':
+    strategy = tf.distribute.OneDeviceStrategy('/cpu:0')
+  elif mode == 'gpu':
+    strategy = tf.distribute.MirroredStrategy()
+  else:
+    raise ValueError('Unsupported distributed mode.')
+  return strategy