Spaces:

Gradio-Blocks
/

beat-interpolator

Runtime error

App Files Files Community

HighCWu commited on May 23, 2022

Commit

c68160d

1 Parent(s): deb2950

init commit.

Browse files

Files changed (19) hide show

.gitignore +131 -0
LICENSE +21 -0
README.md +2 -1
app.py +195 -0
beat_interpolator.py +121 -0
examples/__init__.py +0 -0
examples/example.mp3 +3 -0
examples/models/__init__.py +0 -0
examples/models/anime_biggan/__init__.py +1 -0
examples/models/anime_biggan/model.py +437 -0
examples/models/celeba256/__init__.py +1 -0
examples/models/celeba256/model.py +37 -0
examples/models/fashion/__init__.py +1 -0
examples/models/fashion/model.py +31 -0
examples/models/mnist/__init__.py +1 -0
examples/models/mnist/mnist_generator.pretrained +3 -0
examples/models/mnist/model.py +69 -0
packages.txt +3 -0
requirements.txt +8 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,131 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+*.db

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 艾梦
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -10,4 +10,5 @@ pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

 license: mit
 ---
+# beat-interpolator
+Interpolate the latents of your DL model  to follow the beat of the music

app.py ADDED Viewed

	@@ -0,0 +1,195 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import argparse
+import os
+import glob
+import pickle
+import sys
+import importlib
+from typing import List, Tuple
+import gradio as gr
+import numpy as np
+import torch
+import torch.nn as nn
+from beat_interpolator import beat_interpolator
+def build_models():
+    modules = glob.glob('examples/models/*')
+    modules = [
+        getattr(
+            importlib.import_module(
+                module.replace('/', '.'),
+                package=None
+            ),
+            'create'
+        )()
+        for module in modules
+        if '.py' not in module and '__' not in module
+    ]
+    attrs = [ (module['name'], module) for module in modules ]
+    mnist_idx = -1
+    for i in range(len(attrs)):
+        name, _ = attrs[i]
+        if name == 'MNIST':
+            mnist_idx = i
+    if mnist_idx > -1:
+        mnist_attr = attrs.pop(mnist_idx)
+        attrs.insert(0, mnist_attr)
+    return attrs
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', type=str, default='cpu')
+    parser.add_argument('--theme', type=str)
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('--port', type=int)
+    parser.add_argument('--disable-queue',
+                        dest='enable_queue',
+                        action='store_false')
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    enable_queue = args.enable_queue
+    model_attrs = build_models()
+    with gr.Blocks(theme=args.theme) as demo:
+        gr.Markdown('''<center><h1>Beat-Interpolator</h1></center>
+<h2>Play DL models with music beats.</h2><br />
+This is a Gradio Blocks app of <a href="https://github.com/HighCWu/beat-interpolator">HighCWu/beat-interpolator</a>.
+''')
+        with gr.Tabs():
+            for name, model_attr in model_attrs:
+                with gr.TabItem(name):
+                    generator = model_attr['generator']
+                    latent_dim = model_attr['latent_dim']
+                    default_fps = model_attr['fps']
+                    max_fps = model_attr['fps'] if enable_queue else 60
+                    batch_size = model_attr['batch_size']
+                    strength = model_attr['strength']
+                    default_max_duration  = model_attr['max_duration']
+                    max_duration = model_attr['max_duration'] if enable_queue else 360
+                    use_peak = model_attr['use_peak']
+                    def build_interpolate(
+                        generator,
+                        latent_dim,
+                        batch_size
+                    ):
+                        def interpolate(
+                            wave_path,
+                            seed,
+                            fps=default_fps,
+                            strength=strength,
+                            max_duration=default_max_duration,
+                            use_peak=use_peak):
+                            return beat_interpolator(
+                                wave_path,
+                                generator,
+                                latent_dim,
+                                int(seed),
+                                int(fps),
+                                batch_size,
+                                strength,
+                                max_duration,
+                                use_peak)
+                        return interpolate
+                    interpolate = build_interpolate(generator, latent_dim, batch_size)
+                    with gr.Row():
+                        with gr.Box():
+                            with gr.Column():
+                                with gr.Row():
+                                    wave_in = gr.Audio(
+                                        type="filepath",
+                                        label="Music"
+                                    )
+                                # wave example not supported currently
+                                # with gr.Row():
+                                #     example_audios = gr.Dataset(
+                                #         components=[wave_in],
+                                #         samples=[['examples/example.mp3']]
+                                #     )
+                                #     example_audios.click(
+                                #         fn=lambda examples: gr.Audio.update(value=examples[0]),
+                                #         inputs=example_audios,
+                                #         outputs=example_audios.components
+                                #     )
+                                with gr.Row():
+                                    gr.File(
+                                        value='examples/example.mp3',
+                                        interactive=False,
+                                        label='Example'
+                                    )
+                                with gr.Row():
+                                    seed_in = gr.Number(
+                                        value=128,
+                                        label='Seed'
+                                    )
+                                with gr.Row():
+                                    fps_in = gr.Slider(
+                                        value=default_fps,
+                                        minimum=4,
+                                        maximum=max_fps,
+                                        label="FPS"
+                                    )
+                                with gr.Row():
+                                    strength_in = gr.Slider(
+                                        value=strength,
+                                        maximum=1,
+                                        label="Strength"
+                                    )
+                                with gr.Row():
+                                    max_duration_in = gr.Slider(
+                                        value=default_max_duration,
+                                        minimum=5,
+                                        maximum=max_duration,
+                                        label="Max Duration"
+                                    )
+                                with gr.Row():
+                                    peak_in = gr.Checkbox(value=use_peak, label="Use peak")
+                                with gr.Row():
+                                    generate_button = gr.Button('Generate')
+                        with gr.Box():
+                            with gr.Column():
+                                with gr.Row():
+                                    interpolated_video = gr.Video(label='Output Video')
+                    generate_button.click(interpolate,
+                                        inputs=[
+                                            wave_in,
+                                            seed_in,
+                                            fps_in,
+                                            strength_in,
+                                            max_duration_in,
+                                            peak_in
+                                        ],
+                                        outputs=[interpolated_video])
+        gr.Markdown(
+            '<center><img src="https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.beat-interpolator" alt="visitor badge"/></center>'
+        )
+    demo.launch(
+        enable_queue=args.enable_queue,
+        server_port=args.port,
+        share=args.share,
+    )
+if __name__ == '__main__':
+    main()

beat_interpolator.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import librosa
+import numpy as np
+import gradio as gr
+import soundfile as sf
+from moviepy.editor import *
+cache_wav_path = [f'/tmp/{str(i).zfill(2)}.wav' for i in range(50)]
+wave_path_iter = iter(cache_wav_path)
+cache_mp4_path = [f'/tmp/{str(i).zfill(2)}.mp4' for i in range(50)]
+path_iter = iter(cache_mp4_path)
+def merge_times(times, times2):
+    ids = np.unique(np.where(abs(times2[...,None] - times[None]) < 0.2)[1])
+    mask = np.ones_like(times, dtype=np.bool)
+    mask[ids] = False
+    times = times[mask]
+    times = np.concatenate([times, times2])
+    times = np.sort(times)
+    return times
+def beat_interpolator(wave_path, generator, latent_dim, seed, fps=30, batch_size=1, strength=1, max_duration=None, use_peak=False):
+    fps = max(10, fps)
+    strength = np.clip(strength, 0, 1)
+    hop_length = 512
+    y, sr = librosa.load(wave_path, sr=24000)
+    duration = librosa.get_duration(y=y, sr=sr)
+    if max_duration is not None:
+        y_len = y.shape[0]
+        y_idx = int(y_len * max_duration / duration)
+        y = y[:y_idx]
+        global wave_path_iter
+        try:
+            wave_path = next(wave_path_iter)
+        except:
+            wave_path_iter = iter(cache_wav_path)
+            wave_path = next(wave_path_iter)
+        sf.write(wave_path, y, sr, subtype='PCM_24')
+        y, sr = librosa.load(wave_path, sr=24000)
+        duration = librosa.get_duration(y=y, sr=sr)
+    S = np.abs(librosa.stft(y))
+    db = librosa.power_to_db(S**2, ref=np.median).max(0)
+    db_mean = np.mean(db)
+    db_max = np.max(db)
+    db_min = np.min(db)
+    db_times = librosa.frames_to_time(np.arange(len(db)), sr=sr, hop_length=hop_length)
+    rng = np.random.RandomState(seed)
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512, aggregate=np.median)
+    _, beats = librosa.beat.beat_track(y=y, sr=sr, onset_envelope=onset_env, hop_length=512, units='time')
+    times = np.asarray(beats)
+    if use_peak:
+        peaks = librosa.util.peak_pick(onset_env, 1, 1, 1, 1, 0.8, 5)
+        times2 = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr, hop_length=512)[peaks]
+        times2 = np.asarray(times)
+        times = merge_times(times, times2)
+    times = np.concatenate([np.asarray([0.]), times], 0)
+    times = list(np.unique(np.int64(np.floor(times * fps / 2))) * 2)
+    latents = []
+    time0 = 0
+    latent0 = rng.randn(latent_dim)
+    for time1 in times:
+        latent1 = rng.randn(latent_dim)
+        db_cur_index = np.argmin(np.abs(db_times - time1.astype('float32') / fps))
+        db_cur = db[db_cur_index]
+        if db_cur < db_min + (db_mean - db_min) / 3:
+            latent1 = latent0 * 0.8 + latent1 * 0.2
+        elif db_cur < db_min + 2 * (db_mean - db_min) / 3:
+            latent1 = latent0 * 0.6 + latent1 * 0.4
+        elif db_cur < db_mean + (db_max - db_mean) / 3:
+            latent1 = latent0 * 0.4 + latent1 * 0.6
+        elif db_cur < db_mean + 2 * (db_max - db_mean) / 3:
+            latent1 = latent0 * 0.2 + latent1 * 0.8
+        else:
+            pass
+        if time1 > duration * fps:
+            time1 = int(duration * fps)
+        t1 = time1 - time0
+        alpha = 0.5 * strength
+        latent2 = latent0 * alpha + latent1 * (1 - alpha)
+        for j in range(t1):
+            alpha = j / t1
+            latent = latent0 * (1 - alpha) + latent2 * alpha
+            latents.append(latent)
+        time0 = time1
+        latent0 = latent1
+    outs = []
+    ix = 0
+    while True:
+        if ix + batch_size <= len(latents):
+            outs += generator(latents[ix:ix+batch_size])
+        elif ix < len(latents):
+            outs += generator(latents[ix:])
+            break
+        else:
+            break
+        ix += batch_size
+    global path_iter
+    try:
+        video_path = next(path_iter)
+    except:
+        path_iter = iter(cache_mp4_path)
+        video_path = next(path_iter)
+    video = ImageSequenceClip(outs, fps=fps)
+    audioclip = AudioFileClip(wave_path)
+    video = video.set_audio(audioclip)
+    video.write_videofile(video_path, fps=fps)
+    return video_path

examples/__init__.py ADDED Viewed

File without changes

examples/example.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8afffc71afc7b665cf52c5425a85db533bc4b4b0ea878a6812bcb2a99941e5a3
+size 962186

examples/models/__init__.py ADDED Viewed

File without changes

examples/models/anime_biggan/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import create_anime_biggan_inference as create

examples/models/anime_biggan/model.py ADDED Viewed

	@@ -0,0 +1,437 @@

+import numpy as np
+import torch
+from torch import nn
+from torch.nn import Parameter
+from torch.nn import functional as F
+from huggingface_hub import hf_hub_download
+def l2_normalize(v, dim=None, eps=1e-12):
+    return v / (v.norm(dim=dim, keepdim=True) + eps)
+def unpool(value):
+    """Unpooling operation.
+    N-dimensional version of the unpooling operation from
+    https://www.robots.ox.ac.uk/~vgg/rg/papers/Dosovitskiy_Learning_to_Generate_2015_CVPR_paper.pdf
+    Taken from: https://github.com/tensorflow/tensorflow/issues/2169
+    Args:
+        value: a Tensor of shape [b, d0, d1, ..., dn, ch]
+        name: name of the op
+    Returns:
+        A Tensor of shape [b, 2*d0, 2*d1, ..., 2*dn, ch]
+    """
+    value = torch.Tensor.permute(value, [0,2,3,1])
+    sh = list(value.shape)
+    dim = len(sh[1:-1])
+    out = (torch.reshape(value, [-1] + sh[-dim:]))
+    for i in range(dim, 0, -1):
+        out = torch.cat([out, torch.zeros_like(out)], i)
+    out_size = [-1] + [s * 2 for s in sh[1:-1]] + [sh[-1]]
+    out = torch.reshape(out, out_size)
+    out = torch.Tensor.permute(out, [0,3,1,2])
+    return out
+class BatchNorm2d(nn.BatchNorm2d):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.initialized = False
+        self.accumulating = False
+        self.accumulated_mean = Parameter(torch.zeros(args[0]), requires_grad=False)
+        self.accumulated_var = Parameter(torch.zeros(args[0]), requires_grad=False)
+        self.accumulated_counter = Parameter(torch.zeros(1)+1e-12, requires_grad=False)
+    def forward(self, inputs, *args, **kwargs):
+        if not self.initialized:
+            self.check_accumulation()
+            self.set_initialized(True)
+        if self.accumulating:
+            self.eval()
+            with torch.no_grad():
+                axes = [0] + ([] if len(inputs.shape) == 2 else list(range(2,len(inputs.shape))))
+                _mean = torch.mean(inputs, axes, keepdim=True)
+                mean = torch.mean(inputs, axes, keepdim=False)
+                var = torch.mean((inputs-_mean)**2, axes)
+                self.accumulated_mean.copy_(self.accumulated_mean + mean)
+                self.accumulated_var.copy_(self.accumulated_var + var)
+                self.accumulated_counter.copy_(self.accumulated_counter + 1)
+                _mean = self.running_mean*1.0
+                _variance = self.running_var*1.0
+                self._mean.copy_(self.accumulated_mean / self.accumulated_counter)
+                self._variance.copy_(self.accumulated_var / self.accumulated_counter)
+                out = super().forward(inputs, *args, **kwargs)
+                self.running_mean.copy_(_mean)
+                self.running_var.copy_(_variance)
+                return out
+        out = super().forward(inputs, *args, **kwargs)
+        return out
+    def check_accumulation(self):
+        if self.accumulated_counter.detach().cpu().numpy().mean() > 1-1e-12:
+            self.running_mean.copy_(self.accumulated_mean / self.accumulated_counter)
+            self.running_var.copy_(self.accumulated_var / self.accumulated_counter)
+            return True
+        return False
+    def clear_accumulated(self):
+        self.accumulated_mean.copy_(self.accumulated_mean*0.0)
+        self.accumulated_var.copy_(self.accumulated_var*0.0)
+        self.accumulated_counter.copy_(self.accumulated_counter*0.0+1e-2)
+    def set_accumulating(self, status=True):
+        if status:
+            self.accumulating = True
+        else:
+            self.accumulating = False
+    def set_initialized(self, status=False):
+        if not status:
+            self.initialized = False
+        else:
+            self.initialized = True
+class SpectralNorm(nn.Module):
+    def __init__(self, module, name='weight', power_iterations=2):
+        super().__init__()
+        self.module = module
+        self.name = name
+        self.power_iterations = power_iterations
+        if not self._made_params():
+            self._make_params()
+    def _update_u(self):
+        w = self.weight
+        u = self.weight_u
+        if len(w.shape) == 4:
+            _w = torch.Tensor.permute(w, [2,3,1,0])
+            _w = torch.reshape(_w, [-1, _w.shape[-1]])
+        elif isinstance(self.module, nn.Linear) or isinstance(self.module, nn.Embedding):
+            _w = torch.Tensor.permute(w, [1,0])
+            _w = torch.reshape(_w, [-1, _w.shape[-1]])
+        else:
+            _w = torch.reshape(w, [-1, w.shape[-1]])
+            _w = torch.reshape(_w, [-1, _w.shape[-1]])
+        singular_value = "left" if _w.shape[0] <= _w.shape[1] else "right"
+        norm_dim = 0 if _w.shape[0] <= _w.shape[1] else 1
+        for _ in range(self.power_iterations):
+            if singular_value == "left":
+                v = l2_normalize(torch.matmul(_w.t(), u), dim=norm_dim)
+                u = l2_normalize(torch.matmul(_w, v), dim=norm_dim)
+            else:
+                v = l2_normalize(torch.matmul(u, _w.t()), dim=norm_dim)
+                u = l2_normalize(torch.matmul(v, _w), dim=norm_dim)
+        if singular_value == "left":
+            sigma = torch.matmul(torch.matmul(u.t(), _w), v)
+        else:
+            sigma = torch.matmul(torch.matmul(v, _w), u.t())
+        _w = w / sigma.detach()
+        setattr(self.module, self.name, _w)
+        self.weight_u.copy_(u.detach())
+    def _made_params(self):
+        try:
+            self.weight
+            self.weight_u
+            return True
+        except AttributeError:
+            return False
+    def _make_params(self):
+        w = getattr(self.module, self.name)
+        if len(w.shape) == 4:
+            _w = torch.Tensor.permute(w, [2,3,1,0])
+            _w = torch.reshape(_w, [-1, _w.shape[-1]])
+        elif isinstance(self.module, nn.Linear) or isinstance(self.module, nn.Embedding):
+            _w = torch.Tensor.permute(w, [1,0])
+            _w = torch.reshape(_w, [-1, _w.shape[-1]])
+        else:
+            _w = torch.reshape(w, [-1, w.shape[-1]])
+        singular_value = "left" if _w.shape[0] <= _w.shape[1] else "right"
+        norm_dim = 0 if _w.shape[0] <= _w.shape[1] else 1
+        u_shape = (_w.shape[0], 1) if singular_value == "left" else (1, _w.shape[-1])
+        u = Parameter(w.data.new(*u_shape).normal_(0, 1), requires_grad=False)
+        u.copy_(l2_normalize(u, dim=norm_dim).detach())
+        del self.module._parameters[self.name]
+        self.weight = w
+        self.weight_u = u
+    def forward(self, *args, **kwargs):
+        self._update_u()
+        return self.module.forward(*args, **kwargs)
+class SelfAttention(nn.Module):
+    def __init__(self, in_dim, activation=torch.relu):
+        super().__init__()
+        self.chanel_in = in_dim
+        self.activation = activation
+        self.theta = SpectralNorm(nn.Conv2d(in_dim, in_dim // 8, 1, bias=False))
+        self.phi = SpectralNorm(nn.Conv2d(in_dim, in_dim // 8, 1, bias=False))
+        self.pool = nn.MaxPool2d(2, 2)
+        self.g = SpectralNorm(nn.Conv2d(in_dim, in_dim // 2, 1, bias=False))
+        self.o_conv = SpectralNorm(nn.Conv2d(in_dim // 2, in_dim, 1, bias=False))
+        self.gamma = Parameter(torch.zeros(1))
+    def forward(self, x):
+        m_batchsize, C, width, height = x.shape
+        N = height * width
+        theta = self.theta(x)
+        phi = self.phi(x)
+        phi = self.pool(phi)
+        phi = torch.reshape(phi,(m_batchsize, -1, N // 4))
+        theta = torch.reshape(theta,(m_batchsize, -1, N))
+        theta = torch.Tensor.permute(theta,(0, 2, 1))
+        attention = torch.softmax(torch.bmm(theta, phi), -1)
+        g = self.g(x)
+        g = torch.reshape(self.pool(g),(m_batchsize, -1, N // 4))
+        attn_g = torch.reshape(torch.bmm(g, torch.Tensor.permute(attention,(0, 2, 1))),(m_batchsize, -1, width, height))
+        out = self.o_conv(attn_g)
+        return self.gamma * out + x
+class ConditionalBatchNorm2d(nn.Module):
+    def __init__(self, num_features, num_classes, eps=1e-5, momentum=0.1):
+        super().__init__()
+        self.bn_in_cond = BatchNorm2d(num_features, affine=False, eps=eps, momentum=momentum)
+        self.gamma_embed = SpectralNorm(nn.Linear(num_classes, num_features, bias=False))
+        self.beta_embed = SpectralNorm(nn.Linear(num_classes, num_features, bias=False))
+    def forward(self, x, y):
+        out = self.bn_in_cond(x)
+        if isinstance(y, list):
+            gamma, beta = y
+            out = torch.reshape(gamma, (gamma.shape[0], -1, 1, 1)) * out + torch.reshape(beta, (beta.shape[0], -1, 1, 1))
+            return out
+        gamma = self.gamma_embed(y)
+        # gamma = gamma + 1
+        beta = self.beta_embed(y)
+        out = torch.reshape(gamma, (gamma.shape[0], -1, 1, 1)) * out + torch.reshape(beta, (beta.shape[0], -1, 1, 1))
+        return out
+class ResBlock(nn.Module):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size=[3, 3],
+        padding=1,
+        stride=1,
+        n_class=None,
+        conditional=True,
+        activation=torch.relu,
+        upsample=True,
+        downsample=False,
+        z_dim=128,
+        use_attention=False,
+        skip_proj=None
+    ):
+        super().__init__()
+        if conditional:
+            self.cond_norm1 = ConditionalBatchNorm2d(in_channel, z_dim)
+        self.conv0 = SpectralNorm(
+            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding)
+        )
+        if conditional:
+            self.cond_norm2 = ConditionalBatchNorm2d(out_channel, z_dim)
+        self.conv1 = SpectralNorm(
+            nn.Conv2d(out_channel, out_channel, kernel_size, stride, padding)
+        )
+        self.skip_proj = False
+        if skip_proj is not True and (upsample or downsample):
+            self.conv_sc = SpectralNorm(nn.Conv2d(in_channel, out_channel, 1, 1, 0))
+            self.skip_proj = True
+        if use_attention:
+            self.attention = SelfAttention(out_channel)
+        self.upsample = upsample
+        self.downsample = downsample
+        self.activation = activation
+        self.conditional = conditional
+        self.use_attention = use_attention
+    def forward(self, input, condition=None):
+        out = input
+        if self.conditional:
+            out = self.cond_norm1(out, condition if not isinstance(condition, list) else condition[0])
+        out = self.activation(out)
+        if self.upsample:
+            out = unpool(out) # out = F.interpolate(out, scale_factor=2)
+        out = self.conv0(out)
+        if self.conditional:
+            out = self.cond_norm2(out, condition if not isinstance(condition, list) else condition[1])
+        out = self.activation(out)
+        out = self.conv1(out)
+        if self.downsample:
+            out = F.avg_pool2d(out, 2, 2)
+        if self.skip_proj:
+            skip = input
+            if self.upsample:
+                skip = unpool(skip) # skip = F.interpolate(skip, scale_factor=2)
+            skip = self.conv_sc(skip)
+            if self.downsample:
+                skip = F.avg_pool2d(skip, 2, 2)
+            out = out + skip
+        else:
+            skip = input
+        if self.use_attention:
+            out = self.attention(out)
+        return out
+class Generator(nn.Module):
+    def __init__(self, code_dim=128, n_class=1000, chn=96, blocks_with_attention="B4", resolution=512):
+        super().__init__()
+        def GBlock(in_channel, out_channel, n_class, z_dim, use_attention):
+            return ResBlock(in_channel, out_channel, n_class=n_class, z_dim=z_dim, use_attention=use_attention)
+        self.embed_y = nn.Linear(n_class, 128, bias=False)
+        self.chn = chn
+        self.resolution = resolution
+        self.blocks_with_attention = set(blocks_with_attention.split(","))
+        self.blocks_with_attention.discard('')
+        gblock = []
+        in_channels, out_channels = self.get_in_out_channels()
+        self.num_split = len(in_channels) + 1
+        z_dim = code_dim//self.num_split + 128
+        self.noise_fc = SpectralNorm(nn.Linear(code_dim//self.num_split, 4 * 4 * in_channels[0]))
+        self.sa_ids = [int(s.split('B')[-1]) for s in self.blocks_with_attention]
+        for i, (nc_in, nc_out) in enumerate(zip(in_channels, out_channels)):
+            gblock.append(GBlock(nc_in, nc_out, n_class=n_class, z_dim=z_dim, use_attention=(i+1) in self.sa_ids))
+        self.blocks = nn.ModuleList(gblock)
+        self.output_layer_bn = BatchNorm2d(1 * chn, eps=1e-5)
+        self.output_layer_conv = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1))
+        self.z_dim = code_dim
+        self.c_dim = n_class
+        self.n_level = self.num_split
+    def get_in_out_channels(self):
+        resolution = self.resolution
+        if resolution == 1024:
+            channel_multipliers = [16, 16, 8, 8, 4, 2, 1, 1, 1]
+        elif resolution == 512:
+            channel_multipliers = [16, 16, 8, 8, 4, 2, 1, 1]
+        elif resolution == 256:
+            channel_multipliers = [16, 16, 8, 8, 4, 2, 1]
+        elif resolution == 128:
+            channel_multipliers = [16, 16, 8, 4, 2, 1]
+        elif resolution == 64:
+            channel_multipliers = [16, 16, 8, 4, 2]
+        elif resolution == 32:
+            channel_multipliers = [4, 4, 4, 4]
+        else:
+            raise ValueError("Unsupported resolution: {}".format(resolution))
+        in_channels = [self.chn * c for c in channel_multipliers[:-1]]
+        out_channels = [self.chn * c for c in channel_multipliers[1:]]
+        return in_channels, out_channels
+    def forward(self, input, class_id):
+        codes = torch.chunk(input, self.num_split, 1)
+        class_emb = self.embed_y(class_id)  # 128
+        out = self.noise_fc(codes[0])
+        out = torch.Tensor.permute(torch.reshape(out,(out.shape[0], 4, 4, -1)),(0, 3, 1, 2))
+        for i, (code, gblock) in enumerate(zip(codes[1:], self.blocks)):
+            condition = torch.cat([code, class_emb], 1)
+            out = gblock(out, condition)
+        out = self.output_layer_bn(out)
+        out = torch.relu(out)
+        out = self.output_layer_conv(out)
+        return (torch.tanh(out) + 1) / 2
+    def forward_w(self, ws):
+        out = self.noise_fc(ws[0])
+        out = torch.Tensor.permute(torch.reshape(out,(out.shape[0], 4, 4, -1)),(0, 3, 1, 2))
+        for i, (w, gblock) in enumerate(zip(ws[1:], self.blocks)):
+            out = gblock(out, w)
+        out = self.output_layer_bn(out)
+        out = torch.relu(out)
+        out = self.output_layer_conv(out)
+        return (torch.tanh(out) + 1) / 2
+    def forward_wp(self, z0, gammas, betas):
+        out = self.noise_fc(z0)
+        out = torch.Tensor.permute(torch.reshape(out,(out.shape[0], 4, 4, -1)),(0, 3, 1, 2))
+        for i, (gamma, beta, gblock) in enumerate(zip(gammas, betas, self.blocks)):
+            out = gblock(out, [[gamma[0], beta[0]], [gamma[1], beta[1]]])
+        out = self.output_layer_bn(out)
+        out = torch.relu(out)
+        out = self.output_layer_conv(out)
+        return (torch.tanh(out) + 1) / 2
+def create_anime_biggan_inference():
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    anime_biggan = Generator(
+        code_dim=140, n_class=1000, chn=96,
+        blocks_with_attention="B5", resolution=256
+    )
+    state = torch.load(
+        hf_hub_download('HighCWu/anime-biggan-pytorch',
+                               f'pytorch_model.bin'),
+        map_location='cpu'
+    )
+    anime_biggan.load_state_dict(state)
+    anime_biggan.to(device)
+    anime_biggan.eval()
+    @torch.inference_mode()
+    def anime_biggan_generator(latents):
+        latents = [torch.from_numpy(latent).float().to(device) for latent in latents]
+        latents = torch.stack(latents)
+        label = torch.zeros([latents.shape[0], anime_biggan.c_dim], device=device)
+        label[:,0] = 1
+        out = anime_biggan(latents, label)
+        outs = []
+        for out_i in out:
+            out_i = (out_i.permute(1,2,0) * 255).clamp(0,255).cpu().numpy()
+            out_i = np.uint8(out_i)
+            outs.append(out_i)
+        return outs
+    return {
+        'name': 'Anime Biggan',
+        'generator': anime_biggan_generator,
+        'latent_dim': anime_biggan.z_dim,
+        'fps': 5,
+        'batch_size': 1,
+        'strength': 0.45,
+        'max_duration': 15,
+        'use_peak': True
+    }

examples/models/celeba256/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import create_celeba256_inference as create

examples/models/celeba256/model.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch
+import numpy as np
+def create_celeba256_inference():
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    use_gpu = True if torch.cuda.is_available() else False
+    celeba256 = torch.hub.load(
+        'facebookresearch/pytorch_GAN_zoo:hub',
+        'PGAN',
+        model_name='celebAHQ-256',
+        pretrained=True,
+        useGPU=use_gpu
+    )
+    celeba256_noise, _ = celeba256.buildNoiseData(1)
+    @torch.inference_mode()
+    def celeba256_generator(latents):
+        latents = [torch.from_numpy(latent).float().to(device) for latent in latents]
+        latents = torch.stack(latents)
+        out = celeba256.test(latents)
+        outs = []
+        for out_i in out:
+            out_i = ((out_i.permute(1,2,0) + 1) * 127.5).clamp(0,255).cpu().numpy()
+            out_i = np.uint8(out_i)
+            outs.append(out_i)
+        return outs
+    return {
+        'name': 'Celeba256',
+        'generator': celeba256_generator,
+        'latent_dim': celeba256_noise.shape[1],
+        'fps': 5,
+        'batch_size': 1,
+        'strength': 0.6,
+        'max_duration': 20,
+        'use_peak': True
+    }

examples/models/fashion/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import create_fashion_inference as create

examples/models/fashion/model.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch
+import numpy as np
+def create_fashion_inference():
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    use_gpu = True if torch.cuda.is_available() else False
+    fashion = torch.hub.load('facebookresearch/pytorch_GAN_zoo:hub', 'DCGAN', pretrained=True, useGPU=use_gpu)
+    fashion_noise, _ = fashion.buildNoiseData(1)
+    @torch.inference_mode()
+    def fashion_generator(latents):
+        latents = [torch.from_numpy(latent).float().to(device) for latent in latents]
+        latents = torch.stack(latents)
+        out = fashion.test(latents)
+        outs = []
+        for out_i in out:
+            out_i = ((out_i.permute(1,2,0) + 1) * 127.5).clamp(0,255).cpu().numpy()
+            out_i = np.uint8(out_i)
+            outs.append(out_i)
+        return outs
+    return {
+        'name': 'Fashion',
+        'generator': fashion_generator,
+        'latent_dim': fashion_noise.shape[1],
+        'fps': 15,
+        'batch_size': 8,
+        'strength': 0.6,
+        'max_duration': 30,
+        'use_peak': True
+    }

examples/models/mnist/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import create_mnist_inference as create

examples/models/mnist/mnist_generator.pretrained ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f6628c922425612cf21f48ed3325310c51441b279a86296fd0fa7041451296b
+size 2268434

examples/models/mnist/model.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import numpy as np
+import torch
+import torch.nn as nn
+class Generator(nn.Module):
+    '''Refer to https://github.com/safwankdb/Vanilla-GAN'''
+    def __init__(self):
+        super(Generator, self).__init__()
+        self.n_features = 128
+        self.n_out = 784
+        self.fc0 = nn.Sequential(
+                    nn.Linear(self.n_features, 256),
+                    nn.LeakyReLU(0.2)
+                    )
+        self.fc1 = nn.Sequential(
+                    nn.Linear(256, 512),
+                    nn.LeakyReLU(0.2)
+                    )
+        self.fc2 = nn.Sequential(
+                    nn.Linear(512, 784),
+                    nn.Tanh()
+                    )
+    def forward(self, x):
+        x = self.fc0(x)
+        x = self.fc1(x)
+        x = self.fc2(x)
+        x = x.view(-1, 1, 28, 28)
+        return x
+def create_mnist_inference():
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    mnist = Generator()
+    state = torch.load(
+        os.path.join(
+            os.path.dirname(__file__),
+            'mnist_generator.pretrained'
+        ),
+        map_location='cpu'
+    )
+    mnist.load_state_dict(state)
+    mnist.to(device)
+    mnist.eval()
+    @torch.inference_mode()
+    def mnist_generator(latents):
+        latents = [torch.from_numpy(latent).float().to(device) for latent in latents]
+        latents = torch.stack(latents)
+        out = mnist(latents)
+        outs = []
+        for out_i in out:
+            out_i = ((out_i[0] + 1) * 127.5).clamp(0,255).cpu().numpy()
+            out_i = np.uint8(out_i)
+            out_i = np.stack([out_i]*3, -1)
+            outs.append(out_i)
+        return outs
+    return {
+        'name': 'MNIST',
+        'generator': mnist_generator,
+        'latent_dim': 128,
+        'fps': 20,
+        'batch_size': 8,
+        'strength': 0.75,
+        'max_duration': 30,
+        'use_peak': True
+    }

packages.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+liblzma-dev
+libsndfile1
+ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio==3.0.4
+huggingface-hub==0.6.0
+moviepy==1.0.3
+Pillow==9.0.1
+torch==1.11.0
+torchvision==0.12.0
+librosa
+soundfile