preechanon commited on
Commit
78a3063
1 Parent(s): 8af9c20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -15
app.py CHANGED
@@ -28,17 +28,14 @@ class TimestepDropout(Dropout):
28
  noise_shape = (input_shape[0], input_shape[1], 1)
29
  return noise_shape
30
 
31
-
32
  def model_(n_gram = 21):
33
 
34
  input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
35
  input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
36
-
37
  a = Embedding(178, 32,input_length=21)(input1)
38
  a = SpatialDropout1D(0.15)(a)
39
  #a = TimestepDropout(0.05)(a)
40
  char_input = BatchNormalization()(a)
41
-
42
  a_concat = []
43
  filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
44
  #filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
@@ -58,18 +55,13 @@ def model_(n_gram = 21):
58
  b = Embedding(12, 12, input_length=21)(input2)
59
  type_inputs = SpatialDropout1D(0.15)(b)
60
  #type_inputs = TimestepDropout(0.05)(b)
61
-
62
  x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
63
  x = BatchNormalization()(x)
64
-
65
  x = Flatten()(x)
66
  x = Dense(100, activation='elu')(x)
67
  x = Dropout(0.2)(x)
68
  out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
69
-
70
-
71
  model = Model(inputs=[input1, input2], outputs=out)
72
-
73
  return model
74
 
75
 
@@ -91,23 +83,18 @@ def create_feature_array(text, n_pad=21):
91
  x_char = np.array(x_char).astype(float)
92
  x_type = np.array(x_type).astype(float)
93
  return x_char, x_type
94
-
95
  def tokenize(text):
 
96
  n_pad = 21
97
-
98
  if not text:
99
  return ['']
100
-
101
  if isinstance(text, str) and sys.version_info.major == 2:
102
  text = text.decode('utf-8')
103
-
104
  x_char, x_type = create_feature_array(text, n_pad=n_pad)
105
  word_end = []
106
-
107
  y_predict = model.predict([x_char, x_type], batch_size = 512)
108
  y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
109
  word_end = y_predict[1:].tolist() + [1]
110
-
111
  tokens = []
112
  word = ''
113
  for char, w_e in zip(text, word_end):
@@ -117,7 +104,6 @@ def tokenize(text):
117
  word = ''
118
  return tokens
119
 
120
-
121
  model = model_()
122
  model.load_weights("cutto_tf2.h5")
123
 
 
28
  noise_shape = (input_shape[0], input_shape[1], 1)
29
  return noise_shape
30
 
 
31
  def model_(n_gram = 21):
32
 
33
  input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
34
  input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
 
35
  a = Embedding(178, 32,input_length=21)(input1)
36
  a = SpatialDropout1D(0.15)(a)
37
  #a = TimestepDropout(0.05)(a)
38
  char_input = BatchNormalization()(a)
 
39
  a_concat = []
40
  filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
41
  #filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
 
55
  b = Embedding(12, 12, input_length=21)(input2)
56
  type_inputs = SpatialDropout1D(0.15)(b)
57
  #type_inputs = TimestepDropout(0.05)(b)
 
58
  x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
59
  x = BatchNormalization()(x)
 
60
  x = Flatten()(x)
61
  x = Dense(100, activation='elu')(x)
62
  x = Dropout(0.2)(x)
63
  out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
 
 
64
  model = Model(inputs=[input1, input2], outputs=out)
 
65
  return model
66
 
67
 
 
83
  x_char = np.array(x_char).astype(float)
84
  x_type = np.array(x_type).astype(float)
85
  return x_char, x_type
 
86
  def tokenize(text):
87
+
88
  n_pad = 21
 
89
  if not text:
90
  return ['']
 
91
  if isinstance(text, str) and sys.version_info.major == 2:
92
  text = text.decode('utf-8')
 
93
  x_char, x_type = create_feature_array(text, n_pad=n_pad)
94
  word_end = []
 
95
  y_predict = model.predict([x_char, x_type], batch_size = 512)
96
  y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
97
  word_end = y_predict[1:].tolist() + [1]
 
98
  tokens = []
99
  word = ''
100
  for char, w_e in zip(text, word_end):
 
104
  word = ''
105
  return tokens
106
 
 
107
  model = model_()
108
  model.load_weights("cutto_tf2.h5")
109