NeverMore0123 commited on
Commit
2c4bb7c
·
1 Parent(s): 11c8f9e

update all relative import

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ar_config_base_model.py +1 -1
  2. ar_config_base_model_config.py +6 -6
  3. ar_config_base_tokenizer.py +4 -4
  4. ar_config_inference_inference_config.py +1 -1
  5. ar_diffusion_decoder_config_base_conditioner.py +4 -4
  6. ar_diffusion_decoder_config_config_latent_diffusion_decoder.py +5 -5
  7. ar_diffusion_decoder_config_inference_cosmos_diffusiondecoder_7b.py +3 -3
  8. ar_diffusion_decoder_config_registry.py +4 -4
  9. ar_diffusion_decoder_inference.py +4 -4
  10. ar_diffusion_decoder_model.py +5 -5
  11. ar_diffusion_decoder_network.py +2 -2
  12. ar_model.py +10 -10
  13. ar_module_attention.py +2 -2
  14. ar_network_transformer.py +7 -7
  15. ar_network_vit.py +3 -3
  16. ar_tokenizer_discrete_video.py +1 -1
  17. ar_tokenizer_image_text_tokenizer.py +2 -2
  18. ar_tokenizer_modules.py +3 -3
  19. ar_tokenizer_networks.py +3 -3
  20. ar_tokenizer_quantizers.py +1 -1
  21. ar_tokenizer_text_tokenizer.py +1 -1
  22. ar_tokenizer_tokenizer.py +2 -2
  23. ar_utils_inference.py +2 -2
  24. ar_utils_sampling.py +1 -1
  25. base.py +3 -3
  26. base_world_generation_pipeline.py +2 -2
  27. config.py +2 -2
  28. config_helper.py +2 -2
  29. cosmos1/models/autoregressive/nemo/cosmos.py +1 -1
  30. cosmos1/models/autoregressive/nemo/inference/general.py +3 -3
  31. cosmos1/models/autoregressive/nemo/post_training/prepare_dataset.py +2 -2
  32. cosmos1/models/autoregressive/nemo/utils.py +6 -6
  33. cosmos1/models/diffusion/config/config.py +3 -3
  34. cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-text2world.py +1 -1
  35. cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-video2world.py +2 -2
  36. cosmos1/models/diffusion/inference/text2world.py +4 -4
  37. cosmos1/models/diffusion/inference/video2world.py +4 -4
  38. cosmos1/models/diffusion/inference/world_generation_pipeline.py +5 -5
  39. cosmos1/models/diffusion/nemo/inference/general.py +1 -1
  40. cosmos1/models/diffusion/nemo/inference/inference_utils.py +3 -3
  41. cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py +1 -1
  42. cosmos1/models/diffusion/networks/general_dit_video_conditioned.py +4 -4
  43. cosmos1/models/diffusion/prompt_upsampler/inference.py +3 -3
  44. cosmos1/models/diffusion/prompt_upsampler/text2world_prompt_upsampler_inference.py +3 -3
  45. cosmos1/models/diffusion/prompt_upsampler/video2world_prompt_upsampler_inference.py +4 -4
  46. df_conditioner.py +3 -3
  47. df_config_base_conditioner.py +3 -3
  48. df_config_base_model.py +1 -1
  49. df_config_base_net.py +3 -3
  50. df_config_base_tokenizer.py +2 -2
ar_config_base_model.py CHANGED
@@ -17,7 +17,7 @@ from typing import Optional
17
 
18
  import attrs
19
 
20
- from AutoregressiveVideo2WorldGeneration.ar_config_base_tokenizer import TokenizerConfig
21
 
22
 
23
  @attrs.define
 
17
 
18
  import attrs
19
 
20
+ from .ar_config_base_tokenizer import TokenizerConfig
21
 
22
 
23
  @attrs.define
ar_config_base_model_config.py CHANGED
@@ -16,17 +16,17 @@
16
  import copy
17
  from typing import Callable, List, Optional
18
 
19
- from AutoregressiveVideo2WorldGeneration.ar_config_base_model import ModelConfig
20
- from AutoregressiveVideo2WorldGeneration.ar_config_base_tokenizer import (
21
  TextTokenizerConfig,
22
  TokenizerConfig,
23
  VideoTokenizerConfig,
24
  create_discrete_video_fsq_tokenizer_state_dict_config,
25
  )
26
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_image_text_tokenizer import ImageTextTokenizer
27
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_text_tokenizer import TextTokenizer
28
- from AutoregressiveVideo2WorldGeneration import log
29
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyCall as L
30
 
31
  # Common architecture specifications
32
  BASE_CONFIG = {"n_kv_heads": 8, "norm_type": "rmsnorm", "norm_eps": 1e-5, "ffn_hidden_size": 14336}
 
16
  import copy
17
  from typing import Callable, List, Optional
18
 
19
+ from .ar_config_base_model import ModelConfig
20
+ from .ar_config_base_tokenizer import (
21
  TextTokenizerConfig,
22
  TokenizerConfig,
23
  VideoTokenizerConfig,
24
  create_discrete_video_fsq_tokenizer_state_dict_config,
25
  )
26
+ from .ar_tokenizer_image_text_tokenizer import ImageTextTokenizer
27
+ from .ar_tokenizer_text_tokenizer import TextTokenizer
28
+ from .log import log
29
+ from .lazy_config_init import LazyCall as L
30
 
31
  # Common architecture specifications
32
  BASE_CONFIG = {"n_kv_heads": 8, "norm_type": "rmsnorm", "norm_eps": 1e-5, "ffn_hidden_size": 14336}
ar_config_base_tokenizer.py CHANGED
@@ -17,10 +17,10 @@ from typing import Optional
17
 
18
  import attrs
19
 
20
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_discrete_video import DiscreteVideoFSQStateDictTokenizer
21
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_networks import CausalDiscreteVideoTokenizer
22
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyCall as L
23
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyDict
24
 
25
 
26
  def create_discrete_video_fsq_tokenizer_state_dict_config(
 
17
 
18
  import attrs
19
 
20
+ from .ar_tokenizer_discrete_video import DiscreteVideoFSQStateDictTokenizer
21
+ from .ar_tokenizer_networks import CausalDiscreteVideoTokenizer
22
+ from .lazy_config_init import LazyCall as L
23
+ from .lazy_config_init import LazyDict
24
 
25
 
26
  def create_discrete_video_fsq_tokenizer_state_dict_config(
ar_config_inference_inference_config.py CHANGED
@@ -17,7 +17,7 @@ from typing import Any, List, Union
17
 
18
  import attrs
19
 
20
- from AutoregressiveVideo2WorldGeneration.ar_config_base_model import ModelConfig, TokenizerConfig
21
 
22
 
23
  @attrs.define(slots=False)
 
17
 
18
  import attrs
19
 
20
+ from .ar_config_base_model import ModelConfig, TokenizerConfig
21
 
22
 
23
  @attrs.define(slots=False)
ar_diffusion_decoder_config_base_conditioner.py CHANGED
@@ -18,8 +18,8 @@ from typing import Dict, Optional
18
 
19
  import torch
20
 
21
- from AutoregressiveVideo2WorldGeneration.df_conditioner import BaseVideoCondition, GeneralConditioner
22
- from AutoregressiveVideo2WorldGeneration.df_config_base_conditioner import (
23
  FPSConfig,
24
  ImageSizeConfig,
25
  LatentConditionConfig,
@@ -28,8 +28,8 @@ from AutoregressiveVideo2WorldGeneration.df_config_base_conditioner import (
28
  PaddingMaskConfig,
29
  TextConfig,
30
  )
31
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyCall as L
32
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyDict
33
 
34
 
35
  @dataclass
 
18
 
19
  import torch
20
 
21
+ from .df_conditioner import BaseVideoCondition, GeneralConditioner
22
+ from .df_config_base_conditioner import (
23
  FPSConfig,
24
  ImageSizeConfig,
25
  LatentConditionConfig,
 
28
  PaddingMaskConfig,
29
  TextConfig,
30
  )
31
+ from .lazy_config_init import LazyCall as L
32
+ from .lazy_config_init import LazyDict
33
 
34
 
35
  @dataclass
ar_diffusion_decoder_config_config_latent_diffusion_decoder.py CHANGED
@@ -17,11 +17,11 @@ from typing import Any, List
17
 
18
  import attrs
19
 
20
- from AutoregressiveVideo2WorldGeneration.ar_diffusion_decoder_config_registry import register_configs as register_dd_configs
21
- from AutoregressiveVideo2WorldGeneration.df_config_base_model import LatentDiffusionDecoderModelConfig
22
- from AutoregressiveVideo2WorldGeneration.df_config_registry import register_configs
23
- from AutoregressiveVideo2WorldGeneration import config
24
- from AutoregressiveVideo2WorldGeneration.config_helper import import_all_modules_from_package
25
 
26
 
27
  @attrs.define(slots=False)
 
17
 
18
  import attrs
19
 
20
+ from .ar_diffusion_decoder_config_registry import register_configs as register_dd_configs
21
+ from .df_config_base_model import LatentDiffusionDecoderModelConfig
22
+ from .df_config_registry import register_configs
23
+ from .config import config
24
+ from .config_helper import import_all_modules_from_package
25
 
26
 
27
  @attrs.define(slots=False)
ar_diffusion_decoder_config_inference_cosmos_diffusiondecoder_7b.py CHANGED
@@ -15,9 +15,9 @@
15
 
16
  from hydra.core.config_store import ConfigStore
17
 
18
- from AutoregressiveVideo2WorldGeneration.ar_diffusion_decoder_network import DiffusionDecoderGeneralDIT
19
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyCall as L
20
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyDict
21
 
22
  num_frames = 57
23
  Cosmos_DiffusionDecoder_7B_INFERENCE_ONLY: LazyDict = LazyDict(
 
15
 
16
  from hydra.core.config_store import ConfigStore
17
 
18
+ from .ar_diffusion_decoder_network import DiffusionDecoderGeneralDIT
19
+ from .lazy_config_init import LazyCall as L
20
+ from .lazy_config_init import LazyDict
21
 
22
  num_frames = 57
23
  Cosmos_DiffusionDecoder_7B_INFERENCE_ONLY: LazyDict = LazyDict(
ar_diffusion_decoder_config_registry.py CHANGED
@@ -15,12 +15,12 @@
15
 
16
  from hydra.core.config_store import ConfigStore
17
 
18
- from AutoregressiveVideo2WorldGeneration.ar_diffusion_decoder_config_base_conditioner import (
19
  VideoLatentDiffusionDecoderConditionerConfig,
20
  )
21
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_discrete_video import DiscreteVideoFSQJITTokenizer
22
- from AutoregressiveVideo2WorldGeneration.df_module_pretrained_vae import JITVAE, JointImageVideoSharedJITTokenizer, VideoJITTokenizer
23
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyCall as L
24
 
25
 
26
  def get_cosmos_video_discrete_tokenizer_comp8x16x16(
 
15
 
16
  from hydra.core.config_store import ConfigStore
17
 
18
+ from .ar_diffusion_decoder_config_base_conditioner import (
19
  VideoLatentDiffusionDecoderConditionerConfig,
20
  )
21
+ from .ar_tokenizer_discrete_video import DiscreteVideoFSQJITTokenizer
22
+ from .df_module_pretrained_vae import JITVAE, JointImageVideoSharedJITTokenizer, VideoJITTokenizer
23
+ from .lazy_config_init import LazyCall as L
24
 
25
 
26
  def get_cosmos_video_discrete_tokenizer_comp8x16x16(
ar_diffusion_decoder_inference.py CHANGED
@@ -19,10 +19,10 @@ from typing import List
19
 
20
  import torch
21
 
22
- from AutoregressiveVideo2WorldGeneration.ar_config_inference_inference_config import DiffusionDecoderSamplingConfig
23
- from AutoregressiveVideo2WorldGeneration.ar_diffusion_decoder_model import LatentDiffusionDecoderModel
24
- from AutoregressiveVideo2WorldGeneration.ar_diffusion_decoder_utils import linear_blend_video_list, split_with_overlap
25
- from AutoregressiveVideo2WorldGeneration import log
26
 
27
 
28
  def diffusion_decoder_process_tokens(
 
19
 
20
  import torch
21
 
22
+ from .ar_config_inference_inference_config import DiffusionDecoderSamplingConfig
23
+ from .ar_diffusion_decoder_model import LatentDiffusionDecoderModel
24
+ from .ar_diffusion_decoder_utils import linear_blend_video_list, split_with_overlap
25
+ from .log import log
26
 
27
 
28
  def diffusion_decoder_process_tokens(
ar_diffusion_decoder_model.py CHANGED
@@ -19,11 +19,11 @@ from typing import Callable, Dict, Optional, Tuple
19
  import torch
20
  from torch import Tensor
21
 
22
- from AutoregressiveVideo2WorldGeneration.df_conditioner import BaseVideoCondition
23
- from AutoregressiveVideo2WorldGeneration.df_df_functional_batch_ops import batch_mul
24
- from AutoregressiveVideo2WorldGeneration.df_df_module_res_sampler import COMMON_SOLVER_OPTIONS
25
- from AutoregressiveVideo2WorldGeneration.df_model_model_t2w import DiffusionT2WModel as VideoDiffusionModel
26
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import instantiate as lazy_instantiate
27
 
28
 
29
  @dataclass
 
19
  import torch
20
  from torch import Tensor
21
 
22
+ from .df_conditioner import BaseVideoCondition
23
+ from .df_df_functional_batch_ops import batch_mul
24
+ from .df_df_module_res_sampler import COMMON_SOLVER_OPTIONS
25
+ from .df_model_model_t2w import DiffusionT2WModel as VideoDiffusionModel
26
+ from .lazy_config_init import instantiate as lazy_instantiate
27
 
28
 
29
  @dataclass
ar_diffusion_decoder_network.py CHANGED
@@ -20,8 +20,8 @@ from einops import rearrange
20
  from torch import nn
21
  from torchvision import transforms
22
 
23
- from AutoregressiveVideo2WorldGeneration.df_module_blocks import PatchEmbed
24
- from AutoregressiveVideo2WorldGeneration.df_network_general_dit import GeneralDIT
25
 
26
 
27
  class DiffusionDecoderGeneralDIT(GeneralDIT):
 
20
  from torch import nn
21
  from torchvision import transforms
22
 
23
+ from .df_module_blocks import PatchEmbed
24
+ from .df_network_general_dit import GeneralDIT
25
 
26
 
27
  class DiffusionDecoderGeneralDIT(GeneralDIT):
ar_model.py CHANGED
@@ -19,24 +19,24 @@ import time
19
  from pathlib import Path
20
  from typing import Any, Dict, List, Optional, Set
21
 
22
- from AutoregressiveVideo2WorldGeneration import misc
23
  import torch
24
  from safetensors.torch import load_file
25
  from torch.nn.modules.module import _IncompatibleKeys
26
 
27
- from AutoregressiveVideo2WorldGeneration.ar_config_base_model import ModelConfig
28
- from AutoregressiveVideo2WorldGeneration.ar_config_base_tokenizer import TokenizerConfig
29
- from AutoregressiveVideo2WorldGeneration.ar_module_mm_projector import MultimodalProjector
30
- from AutoregressiveVideo2WorldGeneration.ar_network_transformer import Transformer
31
- from AutoregressiveVideo2WorldGeneration.ar_network_vit import VisionTransformer, get_vit_config
32
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_tokenizer import DiscreteMultimodalTokenizer, update_vocab_size
33
- from AutoregressiveVideo2WorldGeneration.ar_utils_checkpoint import (
34
  get_partial_state_dict,
35
  process_state_dict,
36
  substrings_to_ignore,
37
  )
38
- from AutoregressiveVideo2WorldGeneration.ar_utils_sampling import decode_n_tokens, decode_one_token, prefill
39
- from AutoregressiveVideo2WorldGeneration import log
40
 
41
 
42
  class AutoRegressiveModel(torch.nn.Module):
 
19
  from pathlib import Path
20
  from typing import Any, Dict, List, Optional, Set
21
 
22
+ from .misc import misc
23
  import torch
24
  from safetensors.torch import load_file
25
  from torch.nn.modules.module import _IncompatibleKeys
26
 
27
+ from .ar_config_base_model import ModelConfig
28
+ from .ar_config_base_tokenizer import TokenizerConfig
29
+ from .ar_module_mm_projector import MultimodalProjector
30
+ from .ar_network_transformer import Transformer
31
+ from .ar_network_vit import VisionTransformer, get_vit_config
32
+ from .ar_tokenizer_tokenizer import DiscreteMultimodalTokenizer, update_vocab_size
33
+ from .ar_utils_checkpoint import (
34
  get_partial_state_dict,
35
  process_state_dict,
36
  substrings_to_ignore,
37
  )
38
+ from .ar_utils_sampling import decode_n_tokens, decode_one_token, prefill
39
+ from .log import log
40
 
41
 
42
  class AutoRegressiveModel(torch.nn.Module):
ar_module_attention.py CHANGED
@@ -19,8 +19,8 @@ from typing import Optional, Union
19
  import torch
20
  from torch import nn
21
 
22
- from AutoregressiveVideo2WorldGeneration.ar_module_embedding import RotaryPositionEmbedding
23
- from AutoregressiveVideo2WorldGeneration.ar_module_normalization import create_norm
24
 
25
 
26
  class Attention(nn.Module):
 
19
  import torch
20
  from torch import nn
21
 
22
+ from .ar_module_embedding import RotaryPositionEmbedding
23
+ from .ar_module_normalization import create_norm
24
 
25
 
26
  class Attention(nn.Module):
ar_network_transformer.py CHANGED
@@ -19,17 +19,17 @@ import torch
19
  import torch.nn as nn
20
  from torch.nn.modules.module import _IncompatibleKeys
21
 
22
- from AutoregressiveVideo2WorldGeneration.ar_module_attention import Attention
23
- from AutoregressiveVideo2WorldGeneration.ar_module_embedding import (
24
  RotaryPositionEmbeddingPytorchV1,
25
  RotaryPositionEmbeddingPytorchV2,
26
  SinCosPosEmbAxisTE,
27
  )
28
- from AutoregressiveVideo2WorldGeneration.ar_module_mlp import MLP
29
- from AutoregressiveVideo2WorldGeneration.ar_module_normalization import create_norm
30
- from AutoregressiveVideo2WorldGeneration.ar_utils_checkpoint import process_state_dict, substrings_to_ignore
31
- from AutoregressiveVideo2WorldGeneration.ar_utils_misc import maybe_convert_to_namespace
32
- from AutoregressiveVideo2WorldGeneration import log
33
 
34
 
35
  class TransformerBlock(nn.Module):
 
19
  import torch.nn as nn
20
  from torch.nn.modules.module import _IncompatibleKeys
21
 
22
+ from .ar_module_attention import Attention
23
+ from .ar_module_embedding import (
24
  RotaryPositionEmbeddingPytorchV1,
25
  RotaryPositionEmbeddingPytorchV2,
26
  SinCosPosEmbAxisTE,
27
  )
28
+ from .ar_module_mlp import MLP
29
+ from .ar_module_normalization import create_norm
30
+ from .ar_utils_checkpoint import process_state_dict, substrings_to_ignore
31
+ from .ar_utils_misc import maybe_convert_to_namespace
32
+ from .log import log
33
 
34
 
35
  class TransformerBlock(nn.Module):
ar_network_vit.py CHANGED
@@ -26,9 +26,9 @@ from typing import Any, Callable, Mapping, Optional, Tuple
26
  import torch
27
  import torch.nn as nn
28
 
29
- from AutoregressiveVideo2WorldGeneration.ar_module_normalization import create_norm
30
- from AutoregressiveVideo2WorldGeneration.ar_network_transformer import TransformerBlock
31
- from AutoregressiveVideo2WorldGeneration import log
32
 
33
 
34
  def get_vit_config(model_name: str) -> Mapping[str, Any]:
 
26
  import torch
27
  import torch.nn as nn
28
 
29
+ from .ar_module_normalization import create_norm
30
+ from .ar_network_transformer import TransformerBlock
31
+ from .log import log
32
 
33
 
34
  def get_vit_config(model_name: str) -> Mapping[str, Any]:
ar_tokenizer_discrete_video.py CHANGED
@@ -18,7 +18,7 @@ from typing import Optional
18
  import torch
19
  from einops import rearrange
20
 
21
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_quantizers import FSQuantizer
22
 
23
  # Make sure jit model output consistenly during consecutive calls
24
  # Check here: https://github.com/pytorch/pytorch/issues/74534
 
18
  import torch
19
  from einops import rearrange
20
 
21
+ from .ar_tokenizer_quantizers import FSQuantizer
22
 
23
  # Make sure jit model output consistenly during consecutive calls
24
  # Check here: https://github.com/pytorch/pytorch/issues/74534
ar_tokenizer_image_text_tokenizer.py CHANGED
@@ -21,8 +21,8 @@ import transformers
21
  from transformers import AutoImageProcessor
22
  from transformers.image_utils import ImageInput, is_valid_image, load_image
23
 
24
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_text_tokenizer import TextTokenizer
25
- from AutoregressiveVideo2WorldGeneration import log
26
 
27
  # Configuration for different vision-language models
28
  IMAGE_CONFIGS = {
 
21
  from transformers import AutoImageProcessor
22
  from transformers.image_utils import ImageInput, is_valid_image, load_image
23
 
24
+ from .ar_tokenizer_text_tokenizer import TextTokenizer
25
+ from .log import log
26
 
27
  # Configuration for different vision-language models
28
  IMAGE_CONFIGS = {
ar_tokenizer_modules.py CHANGED
@@ -29,8 +29,8 @@ import torch
29
  import torch.nn as nn
30
  import torch.nn.functional as F
31
 
32
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_patching import Patcher3D, UnPatcher3D
33
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_utils import (
34
  CausalNormalize,
35
  batch2space,
36
  batch2time,
@@ -41,7 +41,7 @@ from AutoregressiveVideo2WorldGeneration.ar_tokenizer_utils import (
41
  space2batch,
42
  time2batch,
43
  )
44
- from AutoregressiveVideo2WorldGeneration import log
45
 
46
 
47
  class CausalConv3d(nn.Module):
 
29
  import torch.nn as nn
30
  import torch.nn.functional as F
31
 
32
+ from .ar_tokenizer_patching import Patcher3D, UnPatcher3D
33
+ from .ar_tokenizer_utils import (
34
  CausalNormalize,
35
  batch2space,
36
  batch2time,
 
41
  space2batch,
42
  time2batch,
43
  )
44
+ from .log import log
45
 
46
 
47
  class CausalConv3d(nn.Module):
ar_tokenizer_networks.py CHANGED
@@ -18,9 +18,9 @@ from collections import namedtuple
18
  import torch
19
  from torch import nn
20
 
21
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_modules import CausalConv3d, DecoderFactorized, EncoderFactorized
22
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_quantizers import FSQuantizer
23
- from AutoregressiveVideo2WorldGeneration import log
24
 
25
  NetworkEval = namedtuple("NetworkEval", ["reconstructions", "quant_loss", "quant_info"])
26
 
 
18
  import torch
19
  from torch import nn
20
 
21
+ from .ar_tokenizer_modules import CausalConv3d, DecoderFactorized, EncoderFactorized
22
+ from .ar_tokenizer_quantizers import FSQuantizer
23
+ from .log import log
24
 
25
  NetworkEval = namedtuple("NetworkEval", ["reconstructions", "quant_loss", "quant_info"])
26
 
ar_tokenizer_quantizers.py CHANGED
@@ -21,7 +21,7 @@ import torch
21
  import torch.nn as nn
22
  from einops import rearrange
23
 
24
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_utils import default, pack_one, round_ste, unpack_one
25
 
26
 
27
  class FSQuantizer(nn.Module):
 
21
  import torch.nn as nn
22
  from einops import rearrange
23
 
24
+ from .ar_tokenizer_utils import default, pack_one, round_ste, unpack_one
25
 
26
 
27
  class FSQuantizer(nn.Module):
ar_tokenizer_text_tokenizer.py CHANGED
@@ -19,7 +19,7 @@ import numpy as np
19
  import torch
20
  from transformers import AutoTokenizer
21
 
22
- from AutoregressiveVideo2WorldGeneration import log
23
 
24
 
25
  def get_tokenizer_path(model_family: str, is_instruct_model: bool = False):
 
19
  import torch
20
  from transformers import AutoTokenizer
21
 
22
+ from .log import log
23
 
24
 
25
  def get_tokenizer_path(model_family: str, is_instruct_model: bool = False):
ar_tokenizer_tokenizer.py CHANGED
@@ -19,8 +19,8 @@ from typing import Optional
19
  import torch
20
  from einops import rearrange
21
 
22
- from AutoregressiveVideo2WorldGeneration.ar_config_base_tokenizer import TokenizerConfig
23
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import instantiate as lazy_instantiate
24
 
25
 
26
  def update_vocab_size(
 
19
  import torch
20
  from einops import rearrange
21
 
22
+ from .ar_config_base_tokenizer import TokenizerConfig
23
+ from .lazy_config_init import instantiate as lazy_instantiate
24
 
25
 
26
  def update_vocab_size(
ar_utils_inference.py CHANGED
@@ -25,8 +25,8 @@ import torch
25
  import torchvision
26
  from PIL import Image
27
 
28
- from AutoregressiveVideo2WorldGeneration.ar_config_inference_inference_config import SamplingConfig
29
- from AutoregressiveVideo2WorldGeneration import log
30
 
31
  _IMAGE_EXTENSIONS = [".png", ".jpg", ".jpeg", "webp"]
32
  _VIDEO_EXTENSIONS = [".mp4"]
 
25
  import torchvision
26
  from PIL import Image
27
 
28
+ from .ar_config_inference_inference_config import SamplingConfig
29
+ from .log import log
30
 
31
  _IMAGE_EXTENSIONS = [".png", ".jpg", ".jpeg", "webp"]
32
  _VIDEO_EXTENSIONS = [".mp4"]
ar_utils_sampling.py CHANGED
@@ -17,7 +17,7 @@ from typing import Optional, Tuple
17
 
18
  import torch
19
 
20
- from AutoregressiveVideo2WorldGeneration.ar_network_transformer import Transformer
21
 
22
 
23
  def sample_top_p(logits, temperature, top_p, return_probs: bool = False):
 
17
 
18
  import torch
19
 
20
+ from .ar_network_transformer import Transformer
21
 
22
 
23
  def sample_top_p(logits, temperature, top_p, return_probs: bool = False):
base.py CHANGED
@@ -19,9 +19,9 @@ import os
19
  import imageio
20
  import torch
21
 
22
- from AutoregressiveVideo2WorldGeneration.world_generation_pipeline import ARBaseGenerationPipeline
23
- from AutoregressiveVideo2WorldGeneration.ar_utils_inference import add_common_arguments, load_vision_input, validate_args
24
- from AutoregressiveVideo2WorldGeneration import log
25
 
26
 
27
  def parse_args():
 
19
  import imageio
20
  import torch
21
 
22
+ from .world_generation_pipeline import ARBaseGenerationPipeline
23
+ from .ar_utils_inference import add_common_arguments, load_vision_input, validate_args
24
+ from .log import log
25
 
26
 
27
  def parse_args():
base_world_generation_pipeline.py CHANGED
@@ -21,8 +21,8 @@ from typing import Any
21
  import numpy as np
22
  import torch
23
 
24
- from AutoregressiveVideo2WorldGeneration.t5_text_encoder import CosmosT5TextEncoder
25
- from AutoregressiveVideo2WorldGeneration import guardrail_common_presets as guardrail_presets
26
 
27
 
28
  class BaseWorldGenerationPipeline(ABC):
 
21
  import numpy as np
22
  import torch
23
 
24
+ from .t5_text_encoder import CosmosT5TextEncoder
25
+ from .guardrail_common_presets import guardrail_common_presets as guardrail_presets
26
 
27
 
28
  class BaseWorldGenerationPipeline(ABC):
config.py CHANGED
@@ -19,8 +19,8 @@ from typing import Any, TypeVar
19
 
20
  import attrs
21
 
22
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyDict
23
- from AutoregressiveVideo2WorldGeneration.misc import Color
24
 
25
  T = TypeVar("T")
26
 
 
19
 
20
  import attrs
21
 
22
+ from .lazy_config_init import LazyDict
23
+ from .misc import Color
24
 
25
  T = TypeVar("T")
26
 
config_helper.py CHANGED
@@ -27,8 +27,8 @@ from hydra import compose, initialize
27
  from hydra.core.config_store import ConfigStore
28
  from omegaconf import DictConfig, OmegaConf
29
 
30
- from AutoregressiveVideo2WorldGeneration import log
31
- from AutoregressiveVideo2WorldGeneration.config import Config
32
 
33
 
34
  def is_attrs_or_dataclass(obj) -> bool:
 
27
  from hydra.core.config_store import ConfigStore
28
  from omegaconf import DictConfig, OmegaConf
29
 
30
+ from .log import log
31
+ from .config import Config
32
 
33
 
34
  def is_attrs_or_dataclass(obj) -> bool:
cosmos1/models/autoregressive/nemo/cosmos.py CHANGED
@@ -29,7 +29,7 @@ from nemo.lightning import OptimizerModule, io
29
  from nemo.lightning.base import teardown
30
  from torch import Tensor, nn
31
 
32
- from AutoregressiveVideo2WorldGeneration import log
33
 
34
 
35
  class RotaryEmbedding3D(RotaryEmbedding):
 
29
  from nemo.lightning.base import teardown
30
  from torch import Tensor, nn
31
 
32
+ from .log import log
33
 
34
 
35
  class RotaryEmbedding3D(RotaryEmbedding):
cosmos1/models/autoregressive/nemo/inference/general.py CHANGED
@@ -34,10 +34,10 @@ from nemo.lightning import io
34
  from nemo.lightning.ckpt_utils import ckpt_to_context_subdir
35
 
36
  from cosmos1.models.autoregressive.nemo.utils import run_diffusion_decoder_model
37
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_discrete_video import DiscreteVideoFSQJITTokenizer
38
- from AutoregressiveVideo2WorldGeneration.ar_utils_inference import load_vision_input
39
  from AutoregressiveVideo2WorldGeneration import guardrail_common_presets as guardrail_presets
40
- from AutoregressiveVideo2WorldGeneration import log
41
 
42
  torch._C._jit_set_texpr_fuser_enabled(False)
43
 
 
34
  from nemo.lightning.ckpt_utils import ckpt_to_context_subdir
35
 
36
  from cosmos1.models.autoregressive.nemo.utils import run_diffusion_decoder_model
37
+ from .ar_tokenizer_discrete_video import DiscreteVideoFSQJITTokenizer
38
+ from .ar_utils_inference import load_vision_input
39
  from AutoregressiveVideo2WorldGeneration import guardrail_common_presets as guardrail_presets
40
+ from .log import log
41
 
42
  torch._C._jit_set_texpr_fuser_enabled(False)
43
 
cosmos1/models/autoregressive/nemo/post_training/prepare_dataset.py CHANGED
@@ -23,8 +23,8 @@ from huggingface_hub import snapshot_download
23
  from nemo.collections.nlp.data.language_modeling.megatron import indexed_dataset
24
 
25
  from cosmos1.models.autoregressive.nemo.utils import read_input_videos
26
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_discrete_video import DiscreteVideoFSQJITTokenizer
27
- from AutoregressiveVideo2WorldGeneration import log
28
 
29
  TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16]
30
  DATA_RESOLUTION_SUPPORTED = [640, 1024]
 
23
  from nemo.collections.nlp.data.language_modeling.megatron import indexed_dataset
24
 
25
  from cosmos1.models.autoregressive.nemo.utils import read_input_videos
26
+ from .ar_tokenizer_discrete_video import DiscreteVideoFSQJITTokenizer
27
+ from .log import log
28
 
29
  TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16]
30
  DATA_RESOLUTION_SUPPORTED = [640, 1024]
cosmos1/models/autoregressive/nemo/utils.py CHANGED
@@ -23,16 +23,16 @@ import torch
23
  import torchvision
24
  from huggingface_hub import snapshot_download
25
 
26
- from AutoregressiveVideo2WorldGeneration.ar_config_inference_inference_config import DiffusionDecoderSamplingConfig
27
- from AutoregressiveVideo2WorldGeneration.ar_diffusion_decoder_inference import diffusion_decoder_process_tokens
28
- from AutoregressiveVideo2WorldGeneration.ar_diffusion_decoder_model import LatentDiffusionDecoderModel
29
- from AutoregressiveVideo2WorldGeneration.df_inference_inference_utils import (
30
  load_network_model,
31
  load_tokenizer_model,
32
  skip_init_linear,
33
  )
34
- from AutoregressiveVideo2WorldGeneration import log
35
- from AutoregressiveVideo2WorldGeneration.config_helper import get_config_module, override
36
 
37
  TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16]
38
  DATA_RESOLUTION_SUPPORTED = [640, 1024]
 
23
  import torchvision
24
  from huggingface_hub import snapshot_download
25
 
26
+ from .ar_config_inference_inference_config import DiffusionDecoderSamplingConfig
27
+ from .ar_diffusion_decoder_inference import diffusion_decoder_process_tokens
28
+ from .ar_diffusion_decoder_model import LatentDiffusionDecoderModel
29
+ from .df_inference_inference_utils import (
30
  load_network_model,
31
  load_tokenizer_model,
32
  skip_init_linear,
33
  )
34
+ from .log import log
35
+ from .config_helper import get_config_module, override
36
 
37
  TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16]
38
  DATA_RESOLUTION_SUPPORTED = [640, 1024]
cosmos1/models/diffusion/config/config.py CHANGED
@@ -17,10 +17,10 @@ from typing import Any, List
17
 
18
  import attrs
19
 
20
- from AutoregressiveVideo2WorldGeneration.df_config_base_model import DefaultModelConfig
21
- from AutoregressiveVideo2WorldGeneration.df_config_registry import register_configs
22
  from AutoregressiveVideo2WorldGeneration import config
23
- from AutoregressiveVideo2WorldGeneration.config_helper import import_all_modules_from_package
24
 
25
 
26
  @attrs.define(slots=False)
 
17
 
18
  import attrs
19
 
20
+ from .df_config_base_model import DefaultModelConfig
21
+ from .df_config_registry import register_configs
22
  from AutoregressiveVideo2WorldGeneration import config
23
+ from .config_helper import import_all_modules_from_package
24
 
25
 
26
  @attrs.define(slots=False)
cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-text2world.py CHANGED
@@ -15,7 +15,7 @@
15
 
16
  from hydra.core.config_store import ConfigStore
17
 
18
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyDict
19
 
20
  Cosmos_1_0_Diffusion_Text2World_7B: LazyDict = LazyDict(
21
  dict(
 
15
 
16
  from hydra.core.config_store import ConfigStore
17
 
18
+ from .lazy_config_init import LazyDict
19
 
20
  Cosmos_1_0_Diffusion_Text2World_7B: LazyDict = LazyDict(
21
  dict(
cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-video2world.py CHANGED
@@ -16,8 +16,8 @@
16
  from hydra.core.config_store import ConfigStore
17
 
18
  from cosmos1.models.diffusion.networks.general_dit_video_conditioned import VideoExtendGeneralDIT
19
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyCall as L
20
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyDict
21
 
22
  Cosmos_1_0_Diffusion_Video2World_7B: LazyDict = LazyDict(
23
  dict(
 
16
  from hydra.core.config_store import ConfigStore
17
 
18
  from cosmos1.models.diffusion.networks.general_dit_video_conditioned import VideoExtendGeneralDIT
19
+ from .lazy_config_init import LazyCall as L
20
+ from .lazy_config_init import LazyDict
21
 
22
  Cosmos_1_0_Diffusion_Video2World_7B: LazyDict = LazyDict(
23
  dict(
cosmos1/models/diffusion/inference/text2world.py CHANGED
@@ -16,13 +16,13 @@
16
  import argparse
17
  import os
18
 
19
- from AutoregressiveVideo2WorldGeneration import misc
20
  import torch
21
 
22
- from AutoregressiveVideo2WorldGeneration.df_inference_inference_utils import add_common_arguments, validate_args
23
  from cosmos1.models.diffusion.inference.world_generation_pipeline import DiffusionText2WorldGenerationPipeline
24
- from AutoregressiveVideo2WorldGeneration import log
25
- from AutoregressiveVideo2WorldGeneration.io import read_prompts_from_file, save_video
26
 
27
  torch.enable_grad(False)
28
 
 
16
  import argparse
17
  import os
18
 
19
+ from .misc import misc
20
  import torch
21
 
22
+ from .df_inference_inference_utils import add_common_arguments, validate_args
23
  from cosmos1.models.diffusion.inference.world_generation_pipeline import DiffusionText2WorldGenerationPipeline
24
+ from .log import log
25
+ from .io import read_prompts_from_file, save_video
26
 
27
  torch.enable_grad(False)
28
 
cosmos1/models/diffusion/inference/video2world.py CHANGED
@@ -16,13 +16,13 @@
16
  import argparse
17
  import os
18
 
19
- from AutoregressiveVideo2WorldGeneration import misc
20
  import torch
21
 
22
- from AutoregressiveVideo2WorldGeneration.df_inference_inference_utils import add_common_arguments, check_input_frames, validate_args
23
  from cosmos1.models.diffusion.inference.world_generation_pipeline import DiffusionVideo2WorldGenerationPipeline
24
- from AutoregressiveVideo2WorldGeneration import log
25
- from AutoregressiveVideo2WorldGeneration.io import read_prompts_from_file, save_video
26
 
27
  torch.enable_grad(False)
28
 
 
16
  import argparse
17
  import os
18
 
19
+ from .misc import misc
20
  import torch
21
 
22
+ from .df_inference_inference_utils import add_common_arguments, check_input_frames, validate_args
23
  from cosmos1.models.diffusion.inference.world_generation_pipeline import DiffusionVideo2WorldGenerationPipeline
24
+ from .log import log
25
+ from .io import read_prompts_from_file, save_video
26
 
27
  torch.enable_grad(False)
28
 
cosmos1/models/diffusion/inference/world_generation_pipeline.py CHANGED
@@ -20,8 +20,8 @@ from typing import Any, Optional
20
  import numpy as np
21
  import torch
22
 
23
- from AutoregressiveVideo2WorldGeneration.base_world_generation_pipeline import BaseWorldGenerationPipeline
24
- from AutoregressiveVideo2WorldGeneration.df_inference_inference_utils import (
25
  generate_world_from_text,
26
  generate_world_from_video,
27
  get_condition_latent,
@@ -30,8 +30,8 @@ from AutoregressiveVideo2WorldGeneration.df_inference_inference_utils import (
30
  load_network_model,
31
  load_tokenizer_model,
32
  )
33
- from AutoregressiveVideo2WorldGeneration.df_model_model_t2w import DiffusionT2WModel
34
- from AutoregressiveVideo2WorldGeneration.df_model_model_v2w import DiffusionV2WModel
35
  from cosmos1.models.diffusion.prompt_upsampler.text2world_prompt_upsampler_inference import (
36
  create_prompt_upsampler,
37
  run_chat_completion,
@@ -43,7 +43,7 @@ from cosmos1.models.diffusion.prompt_upsampler.video2world_prompt_upsampler_infe
43
  from cosmos1.models.diffusion.prompt_upsampler.video2world_prompt_upsampler_inference import (
44
  run_chat_completion as run_chat_completion_vlm,
45
  )
46
- from AutoregressiveVideo2WorldGeneration import log
47
 
48
  MODEL_NAME_DICT = {
49
  "Cosmos-1.0-Diffusion-7B-Text2World": "Cosmos_1_0_Diffusion_Text2World_7B",
 
20
  import numpy as np
21
  import torch
22
 
23
+ from .base_world_generation_pipeline import BaseWorldGenerationPipeline
24
+ from .df_inference_inference_utils import (
25
  generate_world_from_text,
26
  generate_world_from_video,
27
  get_condition_latent,
 
30
  load_network_model,
31
  load_tokenizer_model,
32
  )
33
+ from .df_model_model_t2w import DiffusionT2WModel
34
+ from .df_model_model_v2w import DiffusionV2WModel
35
  from cosmos1.models.diffusion.prompt_upsampler.text2world_prompt_upsampler_inference import (
36
  create_prompt_upsampler,
37
  run_chat_completion,
 
43
  from cosmos1.models.diffusion.prompt_upsampler.video2world_prompt_upsampler_inference import (
44
  run_chat_completion as run_chat_completion_vlm,
45
  )
46
+ from .log import log
47
 
48
  MODEL_NAME_DICT = {
49
  "Cosmos-1.0-Diffusion-7B-Text2World": "Cosmos_1_0_Diffusion_Text2World_7B",
cosmos1/models/diffusion/nemo/inference/general.py CHANGED
@@ -37,7 +37,7 @@ from nemo.collections.diffusion.sampler.cosmos.cosmos_diffusion_pipeline import
37
  from transformers import T5EncoderModel, T5TokenizerFast
38
 
39
  from cosmos1.models.diffusion.nemo.inference.inference_utils import process_prompt, save_video
40
- from AutoregressiveVideo2WorldGeneration import log
41
 
42
  EXAMPLE_PROMPT = (
43
  "The teal robot is cooking food in a kitchen. Steam rises from a simmering pot "
 
37
  from transformers import T5EncoderModel, T5TokenizerFast
38
 
39
  from cosmos1.models.diffusion.nemo.inference.inference_utils import process_prompt, save_video
40
+ from .log import log
41
 
42
  EXAMPLE_PROMPT = (
43
  "The teal robot is cooking food in a kitchen. Steam rises from a simmering pot "
cosmos1/models/diffusion/nemo/inference/inference_utils.py CHANGED
@@ -19,18 +19,18 @@ import imageio
19
  import numpy as np
20
  import torch
21
 
22
- from AutoregressiveVideo2WorldGeneration.ar_model import AutoRegressiveModel
23
  from cosmos1.models.diffusion.prompt_upsampler.text2world_prompt_upsampler_inference import (
24
  create_prompt_upsampler,
25
  run_chat_completion,
26
  )
27
- from AutoregressiveVideo2WorldGeneration.guardrail_common_presets import (
28
  create_text_guardrail_runner,
29
  create_video_guardrail_runner,
30
  run_text_guardrail,
31
  run_video_guardrail,
32
  )
33
- from AutoregressiveVideo2WorldGeneration import log
34
 
35
 
36
  def get_upsampled_prompt(
 
19
  import numpy as np
20
  import torch
21
 
22
+ from .ar_model import AutoRegressiveModel
23
  from cosmos1.models.diffusion.prompt_upsampler.text2world_prompt_upsampler_inference import (
24
  create_prompt_upsampler,
25
  run_chat_completion,
26
  )
27
+ from .guardrail_common_presets import (
28
  create_text_guardrail_runner,
29
  create_video_guardrail_runner,
30
  run_text_guardrail,
31
  run_video_guardrail,
32
  )
33
+ from .log import log
34
 
35
 
36
  def get_upsampled_prompt(
cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py CHANGED
@@ -27,7 +27,7 @@ from nemo.collections.diffusion.models.model import DiT7BConfig
27
  from tqdm import tqdm
28
  from transformers import T5EncoderModel, T5TokenizerFast
29
 
30
- from AutoregressiveVideo2WorldGeneration import log
31
 
32
 
33
  def get_parser():
 
27
  from tqdm import tqdm
28
  from transformers import T5EncoderModel, T5TokenizerFast
29
 
30
+ from .log import log
31
 
32
 
33
  def get_parser():
cosmos1/models/diffusion/networks/general_dit_video_conditioned.py CHANGED
@@ -19,10 +19,10 @@ import torch
19
  from einops import rearrange
20
  from torch import nn
21
 
22
- from AutoregressiveVideo2WorldGeneration.df_conditioner import DataType
23
- from AutoregressiveVideo2WorldGeneration.df_module_blocks import TimestepEmbedding, Timesteps
24
- from AutoregressiveVideo2WorldGeneration.df_network_general_dit import GeneralDIT
25
- from AutoregressiveVideo2WorldGeneration import log
26
 
27
 
28
  class VideoExtendGeneralDIT(GeneralDIT):
 
19
  from einops import rearrange
20
  from torch import nn
21
 
22
+ from .df_conditioner import DataType
23
+ from .df_module_blocks import TimestepEmbedding, Timesteps
24
+ from .df_network_general_dit import GeneralDIT
25
+ from .log import log
26
 
27
 
28
  class VideoExtendGeneralDIT(GeneralDIT):
cosmos1/models/diffusion/prompt_upsampler/inference.py CHANGED
@@ -17,9 +17,9 @@ from typing import List, Optional, TypedDict
17
 
18
  import torch
19
 
20
- from AutoregressiveVideo2WorldGeneration.ar_model import AutoRegressiveModel
21
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_image_text_tokenizer import ImageTextTokenizer
22
- from AutoregressiveVideo2WorldGeneration.ar_tokenizer_text_tokenizer import TextTokenizer
23
 
24
 
25
  class ChatPrediction(TypedDict, total=False):
 
17
 
18
  import torch
19
 
20
+ from .ar_model import AutoRegressiveModel
21
+ from .ar_tokenizer_image_text_tokenizer import ImageTextTokenizer
22
+ from .ar_tokenizer_text_tokenizer import TextTokenizer
23
 
24
 
25
  class ChatPrediction(TypedDict, total=False):
cosmos1/models/diffusion/prompt_upsampler/text2world_prompt_upsampler_inference.py CHANGED
@@ -23,11 +23,11 @@ import argparse
23
  import os
24
  import re
25
 
26
- from AutoregressiveVideo2WorldGeneration.ar_config_base_model_config import create_text_model_config
27
- from AutoregressiveVideo2WorldGeneration.ar_model import AutoRegressiveModel
28
  from cosmos1.models.diffusion.prompt_upsampler.inference import chat_completion
29
  from AutoregressiveVideo2WorldGeneration import guardrail_common_presets as guardrail_presets
30
- from AutoregressiveVideo2WorldGeneration import log
31
 
32
 
33
  def create_prompt_upsampler(checkpoint_dir: str) -> AutoRegressiveModel:
 
23
  import os
24
  import re
25
 
26
+ from .ar_config_base_model_config import create_text_model_config
27
+ from .ar_model import AutoRegressiveModel
28
  from cosmos1.models.diffusion.prompt_upsampler.inference import chat_completion
29
  from AutoregressiveVideo2WorldGeneration import guardrail_common_presets as guardrail_presets
30
+ from .log import log
31
 
32
 
33
  def create_prompt_upsampler(checkpoint_dir: str) -> AutoRegressiveModel:
cosmos1/models/diffusion/prompt_upsampler/video2world_prompt_upsampler_inference.py CHANGED
@@ -26,12 +26,12 @@ from math import ceil
26
 
27
  from PIL import Image
28
 
29
- from AutoregressiveVideo2WorldGeneration.ar_config_base_model_config import create_vision_language_model_config
30
- from AutoregressiveVideo2WorldGeneration.ar_model import AutoRegressiveModel
31
  from cosmos1.models.diffusion.prompt_upsampler.inference import chat_completion
32
  from AutoregressiveVideo2WorldGeneration import guardrail_common_presets as guardrail_presets
33
- from AutoregressiveVideo2WorldGeneration import log
34
- from AutoregressiveVideo2WorldGeneration.io import load_from_fileobj
35
 
36
 
37
  def create_vlm_prompt_upsampler(
 
26
 
27
  from PIL import Image
28
 
29
+ from .ar_config_base_model_config import create_vision_language_model_config
30
+ from .ar_model import AutoRegressiveModel
31
  from cosmos1.models.diffusion.prompt_upsampler.inference import chat_completion
32
  from AutoregressiveVideo2WorldGeneration import guardrail_common_presets as guardrail_presets
33
+ from .log import log
34
+ from .io import load_from_fileobj
35
 
36
 
37
  def create_vlm_prompt_upsampler(
df_conditioner.py CHANGED
@@ -23,9 +23,9 @@ from typing import Any, Dict, List, Optional, Tuple, Union
23
  import torch
24
  import torch.nn as nn
25
 
26
- from AutoregressiveVideo2WorldGeneration.df_df_functional_batch_ops import batch_mul
27
- from AutoregressiveVideo2WorldGeneration import log
28
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import instantiate
29
 
30
 
31
  class BaseConditionEntry(nn.Module):
 
23
  import torch
24
  import torch.nn as nn
25
 
26
+ from .df_df_functional_batch_ops import batch_mul
27
+ from .log import log
28
+ from .lazy_config_init import instantiate
29
 
30
 
31
  class BaseConditionEntry(nn.Module):
df_config_base_conditioner.py CHANGED
@@ -18,9 +18,9 @@ from typing import Dict, List, Optional
18
  import attrs
19
  import torch
20
 
21
- from AutoregressiveVideo2WorldGeneration.df_conditioner import BaseConditionEntry, TextAttr, VideoConditioner, VideoExtendConditioner
22
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyCall as L
23
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyDict
24
 
25
 
26
  @attrs.define(slots=False)
 
18
  import attrs
19
  import torch
20
 
21
+ from .df_conditioner import BaseConditionEntry, TextAttr, VideoConditioner, VideoExtendConditioner
22
+ from .lazy_config_init import LazyCall as L
23
+ from .lazy_config_init import LazyDict
24
 
25
 
26
  @attrs.define(slots=False)
df_config_base_model.py CHANGED
@@ -17,7 +17,7 @@ from typing import List
17
 
18
  import attrs
19
 
20
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyDict
21
 
22
 
23
  @attrs.define(slots=False)
 
17
 
18
  import attrs
19
 
20
+ from .lazy_config_init import LazyDict
21
 
22
 
23
  @attrs.define(slots=False)
df_config_base_net.py CHANGED
@@ -15,9 +15,9 @@
15
 
16
  import copy
17
 
18
- from AutoregressiveVideo2WorldGeneration.df_network_general_dit import GeneralDIT
19
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyCall as L
20
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyDict
21
 
22
  FADITV2Config: LazyDict = L(GeneralDIT)(
23
  max_img_h=240,
 
15
 
16
  import copy
17
 
18
+ from .df_network_general_dit import GeneralDIT
19
+ from .lazy_config_init import LazyCall as L
20
+ from .lazy_config_init import LazyDict
21
 
22
  FADITV2Config: LazyDict = L(GeneralDIT)(
23
  max_img_h=240,
df_config_base_tokenizer.py CHANGED
@@ -15,8 +15,8 @@
15
 
16
  import omegaconf
17
 
18
- from AutoregressiveVideo2WorldGeneration.df_module_pretrained_vae import JITVAE, JointImageVideoSharedJITTokenizer, VideoJITTokenizer
19
- from AutoregressiveVideo2WorldGeneration.lazy_config_init import LazyCall as L
20
 
21
  TOKENIZER_OPTIONS = {}
22
 
 
15
 
16
  import omegaconf
17
 
18
+ from .df_module_pretrained_vae import JITVAE, JointImageVideoSharedJITTokenizer, VideoJITTokenizer
19
+ from .lazy_config_init import LazyCall as L
20
 
21
  TOKENIZER_OPTIONS = {}
22