Update clipGPT.py
Browse files- clipGPT.py +16 -6
clipGPT.py
CHANGED
@@ -7,6 +7,7 @@ import torch
|
|
7 |
import torch.nn as nn
|
8 |
from torch.utils.data import Dataset, DataLoader
|
9 |
from torch.nn import functional as F
|
|
|
10 |
|
11 |
import pandas as pd
|
12 |
import numpy as np
|
@@ -16,19 +17,30 @@ import nltk
|
|
16 |
nltk.download('punkt')
|
17 |
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
class ClipGPT2Model(nn.Module):
|
20 |
def __init__(self, img_feature_length, img_feature_size = 512):
|
21 |
super(ClipGPT2Model, self).__init__()
|
22 |
-
torch.cuda.empty_cache()
|
23 |
-
gc.collect()
|
24 |
self.img_feature_length = img_feature_length
|
25 |
-
|
26 |
self.gpt = GPT2LMHeadModel.from_pretrained('gpt2')
|
27 |
self.gpt_embedding_size = self.gpt.transformer.wte.weight.shape[1]
|
28 |
self.clip_project = Adapter((img_feature_size,
|
29 |
(self.gpt_embedding_size * img_feature_length) // 2,
|
30 |
self.gpt_embedding_size * img_feature_length))
|
31 |
-
torch.cuda.empty_cache()
|
32 |
def get_dummy_token(self,
|
33 |
batch_size: int,
|
34 |
device: torch.device) -> torch.Tensor:
|
@@ -39,8 +51,6 @@ class ClipGPT2Model(nn.Module):
|
|
39 |
feature: torch.Tensor,
|
40 |
mask = None,
|
41 |
labels = None):
|
42 |
-
torch.cuda.empty_cache()
|
43 |
-
gc.collect()
|
44 |
|
45 |
embedding_text = self.gpt.transformer.wte(tokens)
|
46 |
feature_projections = self.clip_project(feature).view(-1, self.img_feature_length, self.gpt_embedding_size)
|
|
|
7 |
import torch.nn as nn
|
8 |
from torch.utils.data import Dataset, DataLoader
|
9 |
from torch.nn import functional as F
|
10 |
+
from typing import Tuple
|
11 |
|
12 |
import pandas as pd
|
13 |
import numpy as np
|
|
|
17 |
nltk.download('punkt')
|
18 |
|
19 |
|
20 |
+
class Adapter(nn.Module):
|
21 |
+
def forward(self, x):
|
22 |
+
return self.model(x)
|
23 |
+
|
24 |
+
def __init__(self, sizes: Tuple[int, ...], bias=True, act=nn.Tanh):
|
25 |
+
super(Adapter, self).__init__()
|
26 |
+
layers = []
|
27 |
+
for i in range(len(sizes) -1):
|
28 |
+
layers.append(nn.Linear(sizes[i], sizes[i + 1], bias=bias))
|
29 |
+
if i < len(sizes) - 2:
|
30 |
+
layers.append(act())
|
31 |
+
self.model = nn.Sequential(*layers)
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
class ClipGPT2Model(nn.Module):
|
36 |
def __init__(self, img_feature_length, img_feature_size = 512):
|
37 |
super(ClipGPT2Model, self).__init__()
|
|
|
|
|
38 |
self.img_feature_length = img_feature_length
|
|
|
39 |
self.gpt = GPT2LMHeadModel.from_pretrained('gpt2')
|
40 |
self.gpt_embedding_size = self.gpt.transformer.wte.weight.shape[1]
|
41 |
self.clip_project = Adapter((img_feature_size,
|
42 |
(self.gpt_embedding_size * img_feature_length) // 2,
|
43 |
self.gpt_embedding_size * img_feature_length))
|
|
|
44 |
def get_dummy_token(self,
|
45 |
batch_size: int,
|
46 |
device: torch.device) -> torch.Tensor:
|
|
|
51 |
feature: torch.Tensor,
|
52 |
mask = None,
|
53 |
labels = None):
|
|
|
|
|
54 |
|
55 |
embedding_text = self.gpt.transformer.wte(tokens)
|
56 |
feature_projections = self.clip_project(feature).view(-1, self.img_feature_length, self.gpt_embedding_size)
|