rynmurdock commited on
Commit
8ed9e1d
1 Parent(s): 35ef920

CPU only for now

Browse files
Files changed (3) hide show
  1. app.py +13 -13
  2. real_im_emb_plot.jpg +0 -0
  3. requirements.txt +1 -0
app.py CHANGED
@@ -3,10 +3,10 @@
3
 
4
  Automatically generated by Colab.
5
 
6
- Original file is located at
7
- https://colab.research.google.com/drive/1I47sLakpuwERGzn-XoNct67mwiDS1mQD
8
  """
9
 
 
 
10
  import matplotlib.pyplot as plt
11
  import matplotlib
12
 
@@ -108,7 +108,7 @@ def latent_code_from_text(text,):# args):
108
  coded1 =torch.Tensor.long(coded1)
109
  with torch.no_grad():
110
  x0 = coded1
111
- x0 = x0.to('cuda')
112
  pooled_hidden_fea = model_vae.encoder(x0, attention_mask=(x0 > 0).float())[1]
113
  mean, logvar = model_vae.encoder.linear(pooled_hidden_fea).chunk(2, -1)
114
  latent_z = mean.squeeze(1)
@@ -128,9 +128,9 @@ def text_from_latent_code(latent_z):
128
  length= length, # Chunyuan: Fix length; or use <EOS> to complete a sentence
129
  temperature=.5,
130
  top_k=100,
131
- top_p=.95,
132
- device='cuda',
133
- decoder_tokenizer = tokenizer_decoder
134
  )
135
  text_x1 = tokenizer_decoder.decode(out[0,:].tolist(), clean_up_tokenization_spaces=True)
136
  text_x1 = text_x1.split()[1:-1]
@@ -158,7 +158,7 @@ encoder_config_class, encoder_model_class, encoder_tokenizer_class = MODEL_CLASS
158
  model_encoder = encoder_model_class.from_pretrained(encoder_path, latent_size=latent_size)
159
  tokenizer_encoder = encoder_tokenizer_class.from_pretrained('bert-base-cased', do_lower_case=True)
160
 
161
- model_encoder.to('cuda')
162
  if block_size <= 0:
163
  block_size = tokenizer_encoder.max_len_single_sentence # Our input block size will be the max possible for the model
164
  block_size = min(block_size, tokenizer_encoder.max_len_single_sentence)
@@ -167,7 +167,7 @@ block_size = min(block_size, tokenizer_encoder.max_len_single_sentence)
167
  decoder_config_class, decoder_model_class, decoder_tokenizer_class = MODEL_CLASSES['gpt2']
168
  model_decoder = decoder_model_class.from_pretrained(decoder_path, latent_size=latent_size)
169
  tokenizer_decoder = decoder_tokenizer_class.from_pretrained('gpt2', do_lower_case=False)
170
- model_decoder.to('cuda')
171
  if block_size <= 0:
172
  block_size = tokenizer_decoder.max_len_single_sentence # Our input block size will be the max possible for the model
173
  block_size = min(block_size, tokenizer_decoder.max_len_single_sentence)
@@ -185,10 +185,10 @@ assert tokenizer_decoder.pad_token == '<PAD>'
185
 
186
 
187
  # Evaluation
188
- model_vae = VAE(model_encoder, model_decoder, tokenizer_encoder, tokenizer_decoder, SimpleNamespace(**{'latent_size': latent_size, 'device':'cuda'}))
189
  model_vae.load_state_dict(checkpoint['model_state_dict'])
190
  print("Pre-trained Optimus is successfully loaded")
191
- model_vae.to('cuda').to(torch.bfloat16)
192
  model_vae = torch.compile(model_vae)
193
 
194
  l = latent_code_from_text('A photo of a mountain.')[0]
@@ -222,17 +222,17 @@ def generate(prompt, in_embs=None,):
222
  if prompt != '':
223
  print(prompt)
224
  in_embs = in_embs / in_embs.abs().max() * .6 if in_embs != None else None
225
- in_embs = 1 * in_embs.to('cuda') + 1 * latent_code_from_text(prompt)[0] if in_embs != None else latent_code_from_text(prompt)[0]
226
  else:
227
  print('From embeds.')
228
  in_embs = in_embs / in_embs.abs().max() * .6
229
- in_embs = in_embs.to('cuda').to(torch.bfloat16)
230
  plt.close('all')
231
  plt.hist(np.array(in_embs.detach().to('cpu').to(torch.float)).flatten(), bins=5)
232
  plt.savefig('real_im_emb_plot.jpg')
233
 
234
 
235
- text = text_from_latent_code(in_embs).replace('<unk> ', '')
236
  in_embs = latent_code_from_text(text)[0]
237
  print(text)
238
  return text, in_embs.to('cpu')
 
3
 
4
  Automatically generated by Colab.
5
 
 
 
6
  """
7
 
8
+ DEVICE = 'cpu'
9
+
10
  import matplotlib.pyplot as plt
11
  import matplotlib
12
 
 
108
  coded1 =torch.Tensor.long(coded1)
109
  with torch.no_grad():
110
  x0 = coded1
111
+ x0 = x0.to(DEVICE)
112
  pooled_hidden_fea = model_vae.encoder(x0, attention_mask=(x0 > 0).float())[1]
113
  mean, logvar = model_vae.encoder.linear(pooled_hidden_fea).chunk(2, -1)
114
  latent_z = mean.squeeze(1)
 
128
  length= length, # Chunyuan: Fix length; or use <EOS> to complete a sentence
129
  temperature=.5,
130
  top_k=100,
131
+ top_p=.98,
132
+ device=DEVICE,
133
+ decoder_tokenizer=tokenizer_decoder
134
  )
135
  text_x1 = tokenizer_decoder.decode(out[0,:].tolist(), clean_up_tokenization_spaces=True)
136
  text_x1 = text_x1.split()[1:-1]
 
158
  model_encoder = encoder_model_class.from_pretrained(encoder_path, latent_size=latent_size)
159
  tokenizer_encoder = encoder_tokenizer_class.from_pretrained('bert-base-cased', do_lower_case=True)
160
 
161
+ model_encoder.to(DEVICE)
162
  if block_size <= 0:
163
  block_size = tokenizer_encoder.max_len_single_sentence # Our input block size will be the max possible for the model
164
  block_size = min(block_size, tokenizer_encoder.max_len_single_sentence)
 
167
  decoder_config_class, decoder_model_class, decoder_tokenizer_class = MODEL_CLASSES['gpt2']
168
  model_decoder = decoder_model_class.from_pretrained(decoder_path, latent_size=latent_size)
169
  tokenizer_decoder = decoder_tokenizer_class.from_pretrained('gpt2', do_lower_case=False)
170
+ model_decoder.to(DEVICE)
171
  if block_size <= 0:
172
  block_size = tokenizer_decoder.max_len_single_sentence # Our input block size will be the max possible for the model
173
  block_size = min(block_size, tokenizer_decoder.max_len_single_sentence)
 
185
 
186
 
187
  # Evaluation
188
+ model_vae = VAE(model_encoder, model_decoder, tokenizer_encoder, tokenizer_decoder, SimpleNamespace(**{'latent_size': latent_size, 'device':DEVICE}))
189
  model_vae.load_state_dict(checkpoint['model_state_dict'])
190
  print("Pre-trained Optimus is successfully loaded")
191
+ model_vae.to(DEVICE).to(torch.bfloat16)
192
  model_vae = torch.compile(model_vae)
193
 
194
  l = latent_code_from_text('A photo of a mountain.')[0]
 
222
  if prompt != '':
223
  print(prompt)
224
  in_embs = in_embs / in_embs.abs().max() * .6 if in_embs != None else None
225
+ in_embs = 1 * in_embs.to(DEVICE) + 1 * latent_code_from_text(prompt)[0] if in_embs != None else latent_code_from_text(prompt)[0]
226
  else:
227
  print('From embeds.')
228
  in_embs = in_embs / in_embs.abs().max() * .6
229
+ in_embs = in_embs.to(DEVICE).to(torch.bfloat16)
230
  plt.close('all')
231
  plt.hist(np.array(in_embs.detach().to('cpu').to(torch.float)).flatten(), bins=5)
232
  plt.savefig('real_im_emb_plot.jpg')
233
 
234
 
235
+ text = ' '.join(text_from_latent_code(in_embs).replace( '<unk>', '').split())
236
  in_embs = latent_code_from_text(text)[0]
237
  print(text)
238
  return text, in_embs.to('cpu')
real_im_emb_plot.jpg CHANGED
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  gradio
 
2
  numpy
3
  scikit-learn
4
  pandas
 
1
  gradio
2
+ boto3
3
  numpy
4
  scikit-learn
5
  pandas