OpenLab-NLP
/

model-prototype

Model card Files Files and versions

xet

Community

Yuchan commited on Nov 23

Commit

fff43f5

verified ·

1 Parent(s): ebb1511

Update Mo.py

Browse files

Files changed (1) hide show

Mo.py +24 -19

Mo.py CHANGED Viewed

@@ -123,7 +123,7 @@ class SwiGLU(layers.Layer):
         x_proj = self.proj(x)
         x_val, x_gate = tf.split(x_proj, 2, axis=-1)
         return self.out(x_val * tf.nn.silu(x_gate))
 class LoUScan(layers.Layer):
     def __init__(self, d_model, clip_value=5.0, eps=1e-6):
         super().__init__()
@@ -137,7 +137,7 @@ class LoUScan(layers.Layer):
         self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
         self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
-        self.glu = SwiGLU(d_model, 320)
     def call(self, x):
         x_f32 = tf.cast(x, tf.float32)
@@ -150,27 +150,32 @@ class LoUScan(layers.Layer):
         g_q = (tf.nn.tanh(q) + 1.0) / 2.0
         g_k = (tf.nn.tanh(k) + 1.0) / 2.0
-        score = g_q * g_k  # gating
-        # tf.scan으로 순차 누적합 (인과적)
-        def step(carry, inputs):
-            prev_sum = carry
-            s, v_t = inputs
-            new_sum = prev_sum + s * v_t
-            # 정규화
-            out = new_sum / tf.maximum(tf.reduce_sum(score[:tf.shape(prev_sum)[0]], axis=0, keepdims=True), self.eps)
-            return new_sum, out
-        # 초기값
-        init = tf.zeros_like(v[0])
-        _, outputs = tf.scan(step, (score, v), initializer=init, axis=0)
-        # 안정화
         outputs = tf.clip_by_value(outputs, -self.clip_value, self.clip_value)
         out = self.norm(outputs + residual)
         out = self.glu(out)
         return tf.cast(out, x.dtype)
 class Lo(layers.Layer):
     def __init__(self, d_model):
         super().__init__()
@@ -240,7 +245,7 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
 # 모델 생성 & 컴파일
 # =======================
 with strategy.scope():
-    model = CumaLM(vocab_size=vocab_size, max_seq_len=max_len, d_ff=256, n_layers=1)
     dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
     _ = model(dummy_input, training=False)
     model.summary()

         x_proj = self.proj(x)
         x_val, x_gate = tf.split(x_proj, 2, axis=-1)
         return self.out(x_val * tf.nn.silu(x_gate))
 class LoUScan(layers.Layer):
     def __init__(self, d_model, clip_value=5.0, eps=1e-6):
         super().__init__()
         self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
         self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
+        self.glu = SwiGLU(d_model, 3500)  # 사용자 정의 GLU
     def call(self, x):
         x_f32 = tf.cast(x, tf.float32)
         g_q = (tf.nn.tanh(q) + 1.0) / 2.0
         g_k = (tf.nn.tanh(k) + 1.0) / 2.0
+        score = g_q * g_k  # element-wise gating
+        # 배치별 순차적 scan 적용 (인과적)
+        def process_sequence(inputs):
+            score_seq, v_seq = inputs
+            seq_len = tf.shape(v_seq)[0]
+            init = tf.zeros_like(v_seq[0])
+            def step(carry, elems):
+                s_t, v_t = elems
+                new_sum = carry + s_t * v_t  # 현재까지 누적
+                out = new_sum / tf.maximum(tf.reduce_sum(score_seq[:tf.shape(v_seq)[0]], axis=0, keepdims=True), self.eps)
+                return new_sum, out
+            _, outputs = tf.scan(step, (score_seq, v_seq), initializer=init)
+            return outputs
+        # 배치 차원 처리
+        outputs = tf.map_fn(lambda inp: process_sequence(inp), (score, v), dtype=tf.float32)
         outputs = tf.clip_by_value(outputs, -self.clip_value, self.clip_value)
         out = self.norm(outputs + residual)
         out = self.glu(out)
         return tf.cast(out, x.dtype)
 class Lo(layers.Layer):
     def __init__(self, d_model):
         super().__init__()
 # 모델 생성 & 컴파일
 # =======================
 with strategy.scope():
+    model = CumaLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=256, n_layers=1)
     dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
     _ = model(dummy_input, training=False)
     model.summary()