ndkhanh95
/

Paligemma

Image-Text-to-Text

text-generation-inference

Model card Files Files and versions

Paligemma / big_vision_repo /big_vision /models /proj /givt /vae.py

ndkhanh95's picture

Upload 304 files

fa1a600 verified 10 months ago

history blame contribute delete

2.91 kB

	# Copyright 2024 Big Vision Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Abstract VAE model class.

	Gaussian encoder and decoder (the latter assumed to have constant variance).

	Inspiration drawn from https://github.com/pytorch/examples/tree/main/vae.
	"""

	import abc
	from typing import Optional, Mapping


	import flax.linen as nn
	import jax
	import jax.numpy as jnp


	class Model(nn.Module, metaclass=abc.ABCMeta):
	"""Abstract VAE model class."""

	codeword_dim: Optional[int] = None
	code_len: int = 256
	code_dropout: str = "none"

	@abc.abstractmethod
	def encode(
	self,
	x: jax.Array,
	*,
	train: bool = False,
	) -> tuple[jax.Array, jax.Array]:
	...

	def reparametrize(
	self,
	mu: jax.Array,
	logvar: jax.Array,
	rng: jax.Array \| None = None,
	) -> jax.Array:
	std = jnp.exp(0.5 * logvar)
	if rng is None:
	rng = self.make_rng("dropout")
	eps = jax.random.normal(rng, shape=std.shape, dtype=std.dtype)
	return mu + std * eps

	@abc.abstractmethod
	def decode(
	self, x: jax.Array,
	train: bool = False,
	) -> jax.Array \| Mapping[str, jax.Array]:
	...

	def code_dropout_fn(self, z: jax.Array, *, train: bool = False) -> jax.Array:
	# "seq" drops out tokens later in the sequence with higher probablility than
	# tokens earlier in the sequence.
	assert self.code_dropout in ["none", "seq", "random"]
	if train and self.code_dropout != "none":
	importance = jnp.linspace(1.0, 0.0, self.code_len + 2)[1:-1]
	thr = jax.random.uniform(self.make_rng("dropout"), z.shape[:1])
	mask = importance[None, :] > thr[:, None]
	if self.code_dropout == "random":
	mask = jax.random.permutation(
	self.make_rng("dropout"), mask, axis=-1, independent=True)
	z = z * mask[:, :, None]
	return z

	def __call__(
	self,
	x: jax.Array,
	*,
	train: bool = False,
	) -> tuple[jax.Array \| Mapping[str, jax.Array], Mapping[str, jax.Array]]:
	mu, logvar = self.encode(x, train=train)
	# Only reparametrize when training for simplicity.
	if train:
	z = self.reparametrize(mu, logvar)
	else:
	z = mu
	z = self.code_dropout_fn(z, train=train)
	x = self.decode(z, train=train)
	return x, {"mu": mu, "logvar": logvar, "z": z}