ierhon commited on
Commit
76b74a3
1 Parent(s): ba1fa51

Improving generalization on small datasets

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -8,6 +8,7 @@ from keras_self_attention import SeqSelfAttention, SeqWeightedAttention
8
 
9
  emb_size = 128
10
  inp_len = 16
 
11
 
12
  def train(data: str, message: str):
13
  if "→" not in data or "\n" not in data:
@@ -34,11 +35,12 @@ def train(data: str, message: str):
34
  y = []
35
 
36
  for key in dset:
37
- tokens = tokenizer.texts_to_sequences([key,])[0]
38
- X.append(np.array((list(tokens)+[0,]*inp_len)[:inp_len]))
39
- output_array = np.zeros(resps_len)
40
- output_array[dset[key]] = 1
41
- y.append(output_array)
 
42
 
43
  X = np.array(X)
44
  y = np.array(y)
 
8
 
9
  emb_size = 128
10
  inp_len = 16
11
+ maxshift = 4
12
 
13
  def train(data: str, message: str):
14
  if "→" not in data or "\n" not in data:
 
35
  y = []
36
 
37
  for key in dset:
38
+ for p in range(maxshift):
39
+ tokens = tokenizer.texts_to_sequences([key,])[0]
40
+ X.append(np.array(([0,]*p+list(tokens)+[0,]*inp_len)[:inp_len]))
41
+ output_array = np.zeros(resps_len)
42
+ output_array[dset[key]] = 1
43
+ y.append(output_array)
44
 
45
  X = np.array(X)
46
  y = np.array(y)