Kc-12 commited on
Commit
429022a
β€’
1 Parent(s): b4eb45f

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +11 -6
  2. better_transformer.py +9 -7
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import streamlit as st
2
  import time
 
3
 
4
  from better_transformer import *
 
5
 
6
  def main():
7
 
8
  # Enable CUDA if available and load in tokenizer
9
- device = torch.device('cuda') #if torch.cuda.is_available() else 'cpu')
10
- tokenizer, EMPTY_TOKENS = load_tokenizer('cuda')
11
 
12
  st.title("Scaling Transformers")
13
  st.subheader("UCLA DSU Project, Fall 2023")
@@ -37,7 +39,7 @@ def main():
37
  """
38
  )
39
  ## Default values for advanced settings
40
- user_seed = 27 # Remove (set to None) if we're not rigging the "random" demo
41
  generation_method = "top-k"
42
  specified_k = 5
43
  specified_nucleus = 0.5
@@ -59,14 +61,17 @@ def main():
59
 
60
  max_tokens = st.slider('Max Tokens Generated:', 100, 500, 400)
61
 
62
-
 
 
63
 
64
 
65
 
66
 
67
  # model_version = st.radio("Which model would you like to use?", ["smoll", "beeg"])
68
  # small_model = load_casey_model(tokenizer, device)
69
- model = load_big_model(tokenizer, 'cuda')
 
70
 
71
 
72
 
@@ -83,7 +88,7 @@ def main():
83
 
84
 
85
  with st.spinner(""):
86
- result = generate(model, tokenizer, 'cuda', method=generation_method, k=specified_k,
87
  p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
88
  cond=user_input, deterministic=user_seed)
89
 
 
1
  import streamlit as st
2
  import time
3
+ import torch
4
 
5
  from better_transformer import *
6
+ torch.set_default_device(torch.device("cuda"))
7
 
8
  def main():
9
 
10
  # Enable CUDA if available and load in tokenizer
11
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
+ tokenizer, EMPTY_TOKENS = load_tokenizer(device)
13
 
14
  st.title("Scaling Transformers")
15
  st.subheader("UCLA DSU Project, Fall 2023")
 
39
  """
40
  )
41
  ## Default values for advanced settings
42
+ user_seed = 27 # Remove if we're not rigging the "random" demo
43
  generation_method = "top-k"
44
  specified_k = 5
45
  specified_nucleus = 0.5
 
61
 
62
  max_tokens = st.slider('Max Tokens Generated:', 100, 500, 400)
63
 
64
+ ## Settings Clean up
65
+ if not user_seed:
66
+ user_seed = 7
67
 
68
 
69
 
70
 
71
  # model_version = st.radio("Which model would you like to use?", ["smoll", "beeg"])
72
  # small_model = load_casey_model(tokenizer, device)
73
+ model = load_big_model(tokenizer, device)
74
+ model.to('cuda')
75
 
76
 
77
 
 
88
 
89
 
90
  with st.spinner(""):
91
+ result = generate(model, tokenizer, device, method=generation_method, k=specified_k,
92
  p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
93
  cond=user_input, deterministic=user_seed)
94
 
better_transformer.py CHANGED
@@ -9,8 +9,10 @@ import torch
9
  import torch.nn as nn
10
  import torch.nn.functional as F
11
  from transformers import AutoTokenizer
 
12
 
13
  MODEL_FILE = r'bt_8_LAYERs_100_DATA_PCT_768_EMBD_DIM_epoch_10.pt' ##place model file in same directory as app.py
 
14
 
15
  # Better Transformer Class –––––––––––––––––––––––––––––––––––––––––––––––
16
 
@@ -139,7 +141,7 @@ class BetterTransformer(nn.Module):
139
  self.seq_length = seq_length
140
  self.pad_idx = pad_idx
141
  self.eos_token_id = eos_token_id
142
- self.device = 'cuda'
143
  self.init_params()
144
 
145
  # optional weight initialization (e.g. Xavier uniform)
@@ -294,7 +296,7 @@ def load_tokenizer(device):
294
  tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
295
  if tokenizer.pad_token is None:
296
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})
297
- EMPTY_TOKENS = torch.full((1,1), tokenizer.bos_token_id, dtype=torch.long).to('cuda')
298
  return tokenizer, EMPTY_TOKENS
299
 
300
 
@@ -307,10 +309,10 @@ def load_big_model(tokenizer, device):
307
  VOCAB_SIZE = 50258
308
  SEQ_LENGTH = 384
309
 
310
- model = BetterTransformer(VOCAB_SIZE, SEQ_LENGTH, N_EMBD, N_HEAD, N_LAYER, tokenizer.pad_token_id, tokenizer.eos_token_id, device='cuda')
311
  model.init_params()
312
  path = MODEL_FILE
313
- model.load_state_dict(torch.load(path, map_location='cuda')["model_state_dict"])
314
 
315
  return model
316
 
@@ -360,9 +362,9 @@ def generate(model, tokenizer, device, method=None, k=None,
360
 
361
  if cond != "":
362
 
363
- cond_tokens = tokenizer(cond).input_ids
364
 
365
- gen_tokens = model.generate(torch.tensor(cond_tokens).unsqueeze(0).long().to('cuda'),
366
  method=method, k=k, p_nucleus=p_nucleus, temp=temp,
367
  max_new_tokens=max_new_tokens)[0]
368
 
@@ -378,7 +380,7 @@ def generate(model, tokenizer, device, method=None, k=None,
378
 
379
 
380
  else:
381
- empty_tokens = torch.full((1,1), tokenizer.bos_token_id, dtype=torch.long).to('cuda')
382
 
383
  res = tokenizer.batch_decode(model.generate(empty_tokens,
384
  method=method, k=k,
 
9
  import torch.nn as nn
10
  import torch.nn.functional as F
11
  from transformers import AutoTokenizer
12
+ from torchinfo import summary
13
 
14
  MODEL_FILE = r'bt_8_LAYERs_100_DATA_PCT_768_EMBD_DIM_epoch_10.pt' ##place model file in same directory as app.py
15
+ torch.set_default_device(torch.device("cuda"))
16
 
17
  # Better Transformer Class –––––––––––––––––––––––––––––––––––––––––––––––
18
 
 
141
  self.seq_length = seq_length
142
  self.pad_idx = pad_idx
143
  self.eos_token_id = eos_token_id
144
+ self.device = device
145
  self.init_params()
146
 
147
  # optional weight initialization (e.g. Xavier uniform)
 
296
  tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
297
  if tokenizer.pad_token is None:
298
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})
299
+ EMPTY_TOKENS = torch.full((1,1), tokenizer.bos_token_id, dtype=torch.long).to(device)
300
  return tokenizer, EMPTY_TOKENS
301
 
302
 
 
309
  VOCAB_SIZE = 50258
310
  SEQ_LENGTH = 384
311
 
312
+ model = BetterTransformer(VOCAB_SIZE, SEQ_LENGTH, N_EMBD, N_HEAD, N_LAYER, tokenizer.pad_token_id, tokenizer.eos_token_id, device=device)
313
  model.init_params()
314
  path = MODEL_FILE
315
+ model.load_state_dict(torch.load(path, map_location=device)["model_state_dict"])
316
 
317
  return model
318
 
 
362
 
363
  if cond != "":
364
 
365
+ cond_tokens = tokenizer(cond).input_ids.to(device) ## HERE???
366
 
367
+ gen_tokens = model.generate(torch.tensor(cond_tokens).unsqueeze(0).long().to(device),
368
  method=method, k=k, p_nucleus=p_nucleus, temp=temp,
369
  max_new_tokens=max_new_tokens)[0]
370
 
 
380
 
381
 
382
  else:
383
+ empty_tokens = torch.full((1,1), tokenizer.bos_token_id, dtype=torch.long).to(device)
384
 
385
  res = tokenizer.batch_decode(model.generate(empty_tokens,
386
  method=method, k=k,