File size: 2,399 Bytes
3ab16a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import torch
import torch.nn as nn
from functools import partial
import clip
from einops import rearrange, repeat
from transformers import CLIPTokenizer, CLIPTextModel
import kornia
import numpy as np
import os

def embedding_forward(
        self,
        input_ids = None,    
        position_ids = None,
        name_batch = None,   
        inputs_embeds = None,
        embedding_manager = None,
        only_embedding=True,
        random_embeddings = None,
        timesteps = None,
    ) -> torch.Tensor:

        seq_length = input_ids.shape[-1] if input_ids is not None else inputs_embeds.shape[-2]

        if inputs_embeds is None:
            inputs_embeds = self.token_embedding(input_ids)
            if only_embedding:      
                return inputs_embeds
            
        if embedding_manager is not None:
            inputs_embeds, other_return_dict = embedding_manager(input_ids, inputs_embeds, name_batch, random_embeddings, timesteps)
        
        if position_ids is None:
            position_ids = self.position_ids[:, :seq_length]   

        position_embeddings = self.position_embedding(position_ids)
        embeddings = inputs_embeds + position_embeddings
        
        return embeddings, other_return_dict


@torch.no_grad()
def _get_celeb_embeddings_basis(tokenizer, text_encoder, good_names_txt):
    
    device = text_encoder.device
    max_length = 77  
    
    with open(good_names_txt, "r") as f:
        celeb_names = f.read().splitlines()

    ''' get tokens and embeddings '''
    all_embeddings = []
    for name in celeb_names:
        batch_encoding = tokenizer(name, truncation=True, return_tensors="pt")
        tokens = batch_encoding["input_ids"].to(device)[:, 1:3] 
        embeddings = text_encoder.text_model.embeddings(input_ids=tokens, only_embedding=True)  
        all_embeddings.append(embeddings)

    all_embeddings: torch.Tensor = torch.cat(all_embeddings, dim=0)
    
    
    print('[all_embeddings loaded] shape =', all_embeddings.shape,
            'max:', all_embeddings.max(),
            'min={}', all_embeddings.min())  
    
    name_emb_mean = all_embeddings.mean(0)
    name_emb_std = all_embeddings.std(0)    
    
    print('[name_emb_mean loaded] shape =', name_emb_mean.shape,
            'max:', name_emb_mean.max(),
            'min={}', name_emb_mean.min())  
    
    return name_emb_mean, name_emb_std