OpenLab-NLP
/

model-prototype

Model card Files Files and versions

xet

Community

Yuchan commited on Nov 17

Commit

7cdfb1b

verified ·

1 Parent(s): 1065dfd

Update Inference.py

Browse files

Files changed (1) hide show

Inference.py +9 -13

Inference.py CHANGED Viewed

@@ -27,7 +27,7 @@ def text_to_ids(text):
 def ids_to_text(ids):
     return sp.decode(ids)
-max_len = 100
 batch_size = 128
 class Lo(layers.Layer):
@@ -118,7 +118,7 @@ class LoSoU(layers.Layer):
         # x: (B, L, d_model) maybe bfloat16 or float32
         # cast to float32 for all internal computations
         x_f32 = tf.cast(x, tf.float32)
-        residual = x_f32
         # Q, K, V
         q = self.Q(x_f32)   # (B, L, 96)
@@ -127,7 +127,7 @@ class LoSoU(layers.Layer):
         # gating signals in (0,1)
         g_q = tf.nn.sigmoid(q)
-        g_k = tf.nn.sigmoid(k)
         # elementwise product -> bounded roughly [0,1]
         score = g_q * g_k
@@ -162,12 +162,11 @@ class LoSoU(layers.Layer):
         gated = tf.nn.silu(a) * b
         out = self.O(gated)
-        out = self.norm(out + residual)
         # cast back to original dtype for downstream layers
         return tf.cast(out, x.dtype)
 class Block(layers.Layer):
     def __init__(self, d_model, hyper_n):
         super().__init__()
@@ -181,23 +180,20 @@ class Block(layers.Layer):
 class ReLaM(tf.keras.Model):
     def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
         super().__init__()
-        self.token_embedding = layers.Embedding(vocab_size, d_model)
-        self.pos_embedding = layers.Embedding(max_seq_len, d_model)
-        self.blocks = [Block(d_model, hyper_n=3) for _ in range(n_layers)]
-        # LayerNormalization은 float32로 해서 정밀도 문제 방지
         self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype="float32")
     def call(self, x, training=False):
         batch_size, seq_len = tf.shape(x)[0], tf.shape(x)[1]
         positions = tf.range(seq_len)[tf.newaxis, :]
         x = self.token_embedding(x) + self.pos_embedding(positions)
         for block in self.blocks:
             x = block(x)
         x = self.ln_f(x)
         embedding_matrix = tf.cast(self.token_embedding.embeddings, x.dtype)
         logits = tf.matmul(x, embedding_matrix, transpose_b=True)
         return tf.cast(logits, tf.float32)

 def ids_to_text(ids):
     return sp.decode(ids)
+max_len = 230
 batch_size = 128
 class Lo(layers.Layer):
         # x: (B, L, d_model) maybe bfloat16 or float32
         # cast to float32 for all internal computations
         x_f32 = tf.cast(x, tf.float32)
         # Q, K, V
         q = self.Q(x_f32)   # (B, L, 96)
         # gating signals in (0,1)
         g_q = tf.nn.sigmoid(q)
+        g_k = tf.nn.tanh(k)
         # elementwise product -> bounded roughly [0,1]
         score = g_q * g_k
         gated = tf.nn.silu(a) * b
         out = self.O(gated)
+        out = self.norm(out)
         # cast back to original dtype for downstream layers
         return tf.cast(out, x.dtype)
 class Block(layers.Layer):
     def __init__(self, d_model, hyper_n):
         super().__init__()
 class ReLaM(tf.keras.Model):
     def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
         super().__init__()
+        self.token_embedding = layers.Embedding(vocab_size, 128)
+        self.pos_embedding = layers.Embedding(max_seq_len, 128)
+        self.blocks = [Block(d_model, hyper_n=1) for _ in range(n_layers)]
+        self.proj = layers.Dense(128)
         self.ln_f = layers.LayerNormalization(epsilon=1e-5, dtype="float32")
     def call(self, x, training=False):
         batch_size, seq_len = tf.shape(x)[0], tf.shape(x)[1]
         positions = tf.range(seq_len)[tf.newaxis, :]
         x = self.token_embedding(x) + self.pos_embedding(positions)
         for block in self.blocks:
             x = block(x)
+        x = self.proj(x)
         x = self.ln_f(x)
         embedding_matrix = tf.cast(self.token_embedding.embeddings, x.dtype)
         logits = tf.matmul(x, embedding_matrix, transpose_b=True)
         return tf.cast(logits, tf.float32)