benchang1110 commited on
Commit
7ca2faf
·
verified ·
1 Parent(s): 3432a0c

Delete instruction/gen_instruction.py

Browse files
Files changed (1) hide show
  1. instruction/gen_instruction.py +0 -612
instruction/gen_instruction.py DELETED
@@ -1,612 +0,0 @@
1
- import argparse
2
-
3
- # operation groups
4
- NOP = 0b00000000000001
5
- TEST_BW = 0b00000000000010
6
- LOAD = 0b00000000000100
7
- STORE = 0b00000000001000
8
- MLP_WM = 0b00000000010000
9
- MLP_QKT = 0b00000000100000
10
- QKT_M_RSQRT = 0b00000001000000
11
- MLP_HP = 0b00000010000000
12
- GATE = 0b00000100000000
13
- RRMS = 0b00001000000000
14
- RMSNORM = 0b00010000000000
15
- SOFTMAX = 0b00100000000000
16
- RESIDUAL = 0b01000000000000
17
- ROPE = 0b10000000000000
18
- # cmode groups
19
- MX_INT8 = 0b0
20
- MX_INT4 = 0b1
21
- #stage groups
22
- SUM = 0b0
23
- GEN = 0b1
24
- # nonlinear groups
25
- NO_ACT = 0b0
26
- SILU = 0b1
27
- # load targets groups
28
- NULL = 0b000
29
- ACT = 0b001
30
- ACT_S = 0b010
31
- RESI = 0b011
32
- RESI_S = 0b100
33
- WEIGHT_S = 0b101
34
- # write back groups
35
- NO_WB = 0b00000001
36
- WB = 0b00000010
37
- WB_KV = 0b00000100
38
- WB_KV_S = 0b00001000
39
- WB_ACT = 0b00010000
40
- WB_ACT_S = 0b00100000
41
- WB_RESI = 0b01000000
42
- WB_RESI_S = 0b10000000
43
-
44
- # address space
45
- WQ_BASE_ADDR , WQ_LAYER_OFFSET = 0x00000000, 0x00100000
46
- WK_BASE_ADDR , WK_LAYER_OFFSET = 0x02000000, 0x00100000
47
- WV_BASE_ADDR , WV_LAYER_OFFSET = 0x04000000, 0x00100000
48
- WO_BASE_ADDR , WO_LAYER_OFFSET = 0x06000000, 0x00100000
49
- W1_BASE_ADDR , W1_LAYER_OFFSET = 0x08000000, 0x002B0000
50
- W3_BASE_ADDR , W3_LAYER_OFFSET = 0x0D600000, 0x002B0000
51
- W2_BASE_ADDR , W2_LAYER_OFFSET = 0x12C00000, 0x002B0000
52
- WQS_BASE_ADDR , WQS_LAYER_OFFSET = 0x18200000, 0x00008000
53
- WKS_BASE_ADDR , WKS_LAYER_OFFSET = 0x18300000, 0x00008000
54
- WVS_BASE_ADDR , WVS_LAYER_OFFSET = 0x18400000, 0x00008000
55
- WOS_BASE_ADDR , WOS_LAYER_OFFSET = 0x18500000, 0x00008000
56
- W1S_BASE_ADDR , W1S_LAYER_OFFSET = 0x18600000, 0x0015800
57
- W3S_BASE_ADDR , W3S_LAYER_OFFSET = 0x188B0000, 0x0015800
58
- W2S_BASE_ADDR , W2S_LAYER_OFFSET = 0x18B60000, 0x0018000
59
- KC_BASE_ADDR , KC_LAYER_OFFSET , KC_TOKEN_OFFSET = 0x18E60000, 0x0080000 , 0x100
60
- VC_BASE_ADDR , VC_LAYER_OFFSET , VC_TOKEN_OFFSET = 0x19E60000, 0x0080000 , 0x100
61
- KCS_BASE_ADDR , KCS_LAYER_OFFSET , KCS_TOKEN_OFFSET = 0x1AE60000, 0x0040000 , 0x80
62
- VCS_BASE_ADDR , VCS_LAYER_OFFSET , VCS_TOKEN_OFFSET = 0x1B660000, 0x0040000 , 0x80
63
- ACT_BASE_ADDR , ACT_TOKEN_OFFSET = 0x1BE60000, 0x1000
64
- RESI_BASE_ADDR , RESI_TOKEN_OFFSET = 0x1C660000, 0x1000
65
- ACTS_BASE_ADDR , ACTS_TOKEN_OFFSET = 0x1CE60000, 0x80
66
- RESIS_BASE_ADDR , RESIS_TOKEN_OFFSET = 0x1CEA0000, 0x80
67
- PRENORM_ADDR , PRENORM_LAYER_OFFSET = 0x1CEE0000, 0x1000
68
- POSTNORM_ADDR , POSTNORM_LAYER_OFFSET = 0x1CF00000, 0x1000
69
- ROPE_BASE_ADDR , ROPE_TOKEN_OFFSET = 0x1CF20000, 0xC0
70
- PRENORMS_ADDR , PRENORMS_LAYER_OFFSET = 0x1CFE0000, 0x80
71
- POSTNORMS_ADDR , POSTNORMS_LAYER_OFFSET = 0x1CFE1000, 0x80
72
- OUTNORM_ADDR = 0x1D840000
73
- OUTNORMS_ADDR = 0x1D841000
74
- WHEAD_BASE_ADDR , WHEAD_LAYER_OFFSET = 0x1D000000, 0x100000
75
- WHEADS_BASE_ADDR , WHEADS_LAYER_OFFSET = 0x1D800000, 0x8000
76
- HEAD_OUT_BASE_ADDR , HEAD_OUT_LAYER_OFFSET = 0x1E000000, 0x1000
77
- HEADS_OUT_BASE_ADDR , HEADS_OUT_LAYER_OFFSET = 0x1E008000, 0x80
78
-
79
- class Instruction:
80
- def __init__(self,
81
- op,
82
- dq_en,
83
- stage,
84
- token,
85
- load_target,
86
- cmode,
87
- nonlinear,
88
- write_back,
89
- input_dim,
90
- output_dim,
91
- input_addr,
92
- scale_addr,
93
- output_addr,
94
- layer_offset,
95
- token_offset,
96
- num_cb_ws,
97
- num_cb_wm
98
- ):
99
- self.op = op
100
- self.dq_en = dq_en
101
- self.stage = stage
102
- self.token = token
103
- self.load_target = load_target
104
- self.cmode = cmode
105
- self.nonlinear = nonlinear
106
- self.write_back = write_back
107
- self.input_dim = input_dim
108
- self.output_dim = output_dim
109
- self.input_addr = input_addr
110
- self.scale_addr = scale_addr
111
- self.output_addr = output_addr
112
- self.layer_offset = layer_offset
113
- self.token_offset = token_offset
114
- self.num_cb_ws = num_cb_ws
115
- self.num_cb_wm = num_cb_wm
116
-
117
- def to_binary(self, inst_num:int, inst_info:str):
118
- print('INFO: {:30s} , Instruction id: {}'.format(inst_info, inst_num))
119
- # Convert the instruction to a binary format
120
- binary_format = (
121
- f"{self.op:014b}{self.dq_en:01b}{self.stage:01b}{self.token:011b}"
122
- f"{self.load_target:03b}{self.cmode:01b}{self.nonlinear:01b}{self.write_back:08b}"
123
- f"{self.input_dim:016b}{self.output_dim:016b}"
124
- f"{self.input_addr:032b}{self.scale_addr:032b}{self.output_addr:032b}"
125
- f"{self.layer_offset:032b}{self.token_offset:032b}{self.num_cb_ws:016b}{self.num_cb_wm:016b}"
126
- )
127
- padding_length = 512 - len(binary_format)
128
- binary_format = '0' * padding_length + binary_format
129
- return binary_format
130
-
131
- def gen_inst(op, dq_en, stage, token, load_target, cmode, nonlinear, write_back, input_dim, output_dim, input_addr, scale_addr, output_addr, layer_offset, token_offset, num_cb_ws, num_cb_wm):
132
- return Instruction(op, dq_en, stage, token, load_target, cmode, nonlinear, write_back, input_dim, output_dim, input_addr, scale_addr, output_addr, layer_offset, token_offset, num_cb_ws, num_cb_wm)
133
- # =====================================================================================================================================================================================================================
134
- # Test bandwidth
135
- # =====================================================================================================================================================================================================================
136
- inst_test_bw = gen_inst(TEST_BW, 0, SUM, 0, NULL, MX_INT8, NO_ACT, NO_WB, 0, 0 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 0 )
137
- # =====================================================================================================================================================================================================================
138
- # MXINT8 Instruction templates
139
- # =====================================================================================================================================================================================================================
140
- inst_nop = gen_inst(NOP, 0, SUM, 0, NULL, MX_INT8, NO_ACT, NO_WB, 0, 0 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 0 )
141
- inst_test_bw = gen_inst(TEST_BW, 0, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 0, 0 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 0 )
142
- inst_load_resi = gen_inst(LOAD, 0, GEN, 0, RESI, MX_INT8, NO_ACT, NO_WB, 4096, 0 , RESI_BASE_ADDR, 0x00000000, 0x00000000, 0x00000000, RESI_TOKEN_OFFSET, 0, 0 )
143
- inst_load_in_act = gen_inst(LOAD, 0, GEN, 0, ACT, MX_INT8, NO_ACT, NO_WB, 4096, 0 , ACT_BASE_ADDR, 0x00000000, 0x00000000, 0x00000000, ACT_TOKEN_OFFSET, 0, 0 )
144
- inst_load_resi_s = gen_inst(LOAD, 0, GEN, 0, RESI_S, MX_INT8, NO_ACT, NO_WB, 128, 0 , RESIS_BASE_ADDR, 0x00000000, 0x00000000, 0x00000000, RESIS_TOKEN_OFFSET, 0, 0 )
145
- inst_load_in_act_s = gen_inst(LOAD, 0, GEN, 0, ACT_S, MX_INT8, NO_ACT, NO_WB, 128, 0 , ACTS_BASE_ADDR, 0x00000000, 0x00000000, 0x00000000, ACTS_TOKEN_OFFSET, 0, 0 )
146
- inst_mlp_wm_q = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT8, NO_ACT, NO_WB, 4096, 4096 , WQ_BASE_ADDR, WQS_BASE_ADDR, 0x00000000, WQ_LAYER_OFFSET, 0x00000000, 128, 4096 )
147
- inst_mlp_wm_k = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT8, NO_ACT, NO_WB, 4096, 4096 , WK_BASE_ADDR, WKS_BASE_ADDR, 0x00000000, WK_LAYER_OFFSET, 0x00000000, 128, 4096 )
148
- inst_mlp_wm_v = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT8, NO_ACT, WB, 4096, 4096 , WV_BASE_ADDR, WVS_BASE_ADDR, 0x00000000, WV_LAYER_OFFSET, 0x00000000, 128, 4096 )
149
- inst_mlp_wm_o = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT8, NO_ACT, NO_WB, 4096, 4096 , WO_BASE_ADDR, WOS_BASE_ADDR, 0x00000000, WO_LAYER_OFFSET, 0x00000000, 128, 4096 )
150
- inst_mlp_wm_w1 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT8, SILU, NO_WB, 4096, 11008 , W1_BASE_ADDR, W1S_BASE_ADDR, 0x00000000, W1_LAYER_OFFSET, 0x00000000, 32, 1024 )
151
- inst_mlp_wm_w3 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT8, NO_ACT, NO_WB, 4096, 11008 , W3_BASE_ADDR, W3S_BASE_ADDR, 0x00000000, W3_LAYER_OFFSET, 0x00000000, 32, 1024 )
152
- inst_mlp_wm_w2 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT8, NO_ACT, NO_WB, 11008, 4096 , W2_BASE_ADDR, W2S_BASE_ADDR, 0x00000000, W2_LAYER_OFFSET, 0x00000000, 24, 688 )
153
- inst_gate = gen_inst(GATE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 11008, 11008 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 0 )
154
- inst_residual = gen_inst(RESIDUAL, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB, 4096, 4096 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 0 )
155
- inst_store_act = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_ACT, 4096, 4096 , 0x00000000, 0x00000000, ACT_BASE_ADDR, 0x00000000, ACT_TOKEN_OFFSET, 0, 0 )
156
- inst_store_act_s = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_ACT_S, 4096, 4096 , 0x00000000, 0x00000000, ACTS_BASE_ADDR, 0x00000000, ACTS_TOKEN_OFFSET, 0, 0 )
157
- inst_store_resi = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_ACT, 4096, 4096 , 0x00000000, 0x00000000, RESI_BASE_ADDR, 0x00000000, RESI_TOKEN_OFFSET, 0, 0 )
158
- inst_store_resi_s = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_ACT_S, 4096, 4096 , 0x00000000, 0x00000000, RESIS_BASE_ADDR, 0x00000000, RESIS_TOKEN_OFFSET, 0, 0 )
159
- inst_rope_nwb = gen_inst(ROPE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 192, 4096 , ROPE_BASE_ADDR, 0x00000000, 0x00000000, 0x00000000, ROPE_TOKEN_OFFSET, 0, 0 )
160
- inst_rope_wb = gen_inst(ROPE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB, 192, 4096 , ROPE_BASE_ADDR, 0x00000000, 0x00000000, 0x00000000, ROPE_TOKEN_OFFSET, 0, 0 )
161
- inst_store_k = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_KV, 4096, 4096 , 0x00000000, 0x00000000, KC_BASE_ADDR, 0x00000000, KC_TOKEN_OFFSET, 0, 0 )
162
- inst_store_k_s = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_KV_S, 4096, 4096 , 0x00000000, 0x00000000, KCS_BASE_ADDR, 0x00000000, KCS_TOKEN_OFFSET, 0, 0 )
163
- inst_store_v = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_KV, 4096, 4096 , 0x00000000, 0x00000000, VC_BASE_ADDR, 0x00000000, VC_TOKEN_OFFSET, 0, 0 )
164
- inst_store_v_s = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_KV_S, 4096, 4096 , 0x00000000, 0x00000000, VCS_BASE_ADDR, 0x00000000, VCS_TOKEN_OFFSET, 0, 0 )
165
- inst_mlp_qkt = gen_inst(MLP_QKT, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 4, 1 , KC_BASE_ADDR, KCS_BASE_ADDR, 0x00000000, 0x00000000, KCS_TOKEN_OFFSET, 4, 32 )
166
- inst_qkt_m_rsqrt = gen_inst(QKT_M_RSQRT, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 1, 1 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 1, 512 )
167
- inst_softmax = gen_inst(SOFTMAX, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 0, 1 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 1, 0 )
168
- inst_mlp_hp = gen_inst(MLP_HP, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 4, 4096 , VC_BASE_ADDR, VCS_BASE_ADDR, 0x00000000, 0x00000000, VCS_TOKEN_OFFSET, 4, 32 )
169
- inst_prerrms = gen_inst(RRMS, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 4096, 1 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 128 )
170
- inst_load_pre_wm = gen_inst(LOAD, 0, GEN, 0, ACT, MX_INT8, NO_ACT, NO_WB, 4096, 0 , PRENORM_ADDR, 0x00000000, 0x00000000, PRENORM_LAYER_OFFSET, 0x00000000, 0, 0 )
171
- inst_load_pre_ws = gen_inst(LOAD, 0, GEN, 0, ACT_S, MX_INT8, NO_ACT, NO_WB, 128, 0 , PRENORMS_ADDR, 0x00000000, 0x00000000, PRENORMS_LAYER_OFFSET, 0x00000000, 0, 0 )
172
- inst_prermsnorm = gen_inst(RMSNORM, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 4096, 4096 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 128 )
173
- inst_postrrms = gen_inst(RRMS, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 4096, 1 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 128 )
174
- inst_load_post_wm = gen_inst(LOAD, 0, GEN, 0, ACT, MX_INT8, NO_ACT, NO_WB, 4096, 0 , POSTNORM_ADDR, 0x00000000, 0x00000000, POSTNORM_LAYER_OFFSET, 0x00000000, 0, 0 )
175
- inst_load_post_ws = gen_inst(LOAD, 0, GEN, 0, ACT_S, MX_INT8, NO_ACT, NO_WB, 128, 0 , POSTNORMS_ADDR, 0x00000000, 0x00000000, POSTNORMS_LAYER_OFFSET, 0x00000000, 0, 0 )
176
- inst_postrmsnorm = gen_inst(RMSNORM, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 4096, 4096 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 128 )
177
- # =====================================================================================================================================================================================================================
178
- # DECODER OUT Instruction templates
179
- # =====================================================================================================================================================================================================================
180
- inst_outrrms = gen_inst(RRMS, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 4096, 1 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 128 )
181
- inst_load_out_wm = gen_inst(LOAD, 0, GEN, 0, ACT, MX_INT8, NO_ACT, NO_WB, 4096, 0 , OUTNORM_ADDR, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 0 )
182
- inst_load_out_ws = gen_inst(LOAD, 0, GEN, 0, ACT_S, MX_INT8, NO_ACT, NO_WB, 128, 0 , OUTNORMS_ADDR, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 0 )
183
- inst_outrmsnorm = gen_inst(RMSNORM, 1, GEN, 0, NULL, MX_INT8, NO_ACT, NO_WB, 4096, 4096 , 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0, 128 )
184
- inst_mlp_wm_whead = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT8, NO_ACT, WB, 4096, 4096 , WHEAD_BASE_ADDR, WHEADS_BASE_ADDR, 0x00000000, WHEAD_LAYER_OFFSET, 0x00000000, 128, 4096 )
185
- inst_store_head = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_ACT, 4096, 4096 , 0x00000000, 0x00000000, HEAD_OUT_BASE_ADDR, 0x00000000, HEAD_OUT_LAYER_OFFSET, 0, 0 )
186
- inst_store_head_s = gen_inst(STORE, 1, GEN, 0, NULL, MX_INT8, NO_ACT, WB_ACT_S, 4096, 4096 , 0x00000000, 0x00000000, HEADS_OUT_BASE_ADDR, 0x00000000, HEADS_OUT_LAYER_OFFSET, 0, 0 )
187
- # =====================================================================================================================================================================================================================
188
- # MXINT4 Instruction templates
189
- # =====================================================================================================================================================================================================================
190
- inst_mlp_wm_whead_mxint4= gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT4, NO_ACT, WB, 4096, 4096 , WHEAD_BASE_ADDR, WHEADS_BASE_ADDR, 0x00000000, WHEAD_LAYER_OFFSET, 0x00000000, 128, 2048 )
191
- inst_mlp_wm_q_mxint4 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT4, NO_ACT, NO_WB, 4096, 4096 , WQ_BASE_ADDR, WQS_BASE_ADDR, 0x00000000, WQ_LAYER_OFFSET, 0x00000000, 128, 2048 )
192
- inst_mlp_wm_k_mxint4 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT4, NO_ACT, NO_WB, 4096, 4096 , WK_BASE_ADDR, WKS_BASE_ADDR, 0x00000000, WK_LAYER_OFFSET, 0x00000000, 128, 2048 )
193
- inst_mlp_wm_v_mxint4 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT4, NO_ACT, WB, 4096, 4096 , WV_BASE_ADDR, WVS_BASE_ADDR, 0x00000000, WV_LAYER_OFFSET, 0x00000000, 128, 2048 )
194
- inst_mlp_wm_o_mxint4 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT4, NO_ACT, NO_WB, 4096, 4096 , WO_BASE_ADDR, WOS_BASE_ADDR, 0x00000000, WO_LAYER_OFFSET, 0x00000000, 128, 2048 )
195
- inst_mlp_wm_w1_mxint4 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT4, SILU, NO_WB, 4096, 11008 , W1_BASE_ADDR, W1S_BASE_ADDR, 0x00000000, W1_LAYER_OFFSET, 0x00000000, 32, 512 )
196
- inst_mlp_wm_w3_mxint4 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT4, NO_ACT, NO_WB, 4096, 11008 , W3_BASE_ADDR, W3S_BASE_ADDR, 0x00000000, W3_LAYER_OFFSET, 0x00000000, 32, 512 )
197
- inst_mlp_wm_w2_mxint4 = gen_inst(MLP_WM, 1, GEN, 0, WEIGHT_S, MX_INT4, NO_ACT, NO_WB, 11008, 4096 , W2_BASE_ADDR, W2S_BASE_ADDR, 0x00000000, W2_LAYER_OFFSET, 0x00000000, 24, 344 )
198
- # =====================================================================================================================================================================================================================
199
- # Instruction templates end
200
- # =====================================================================================================================================================================================================================
201
- # gen instructions parameters
202
- parser = argparse.ArgumentParser(description='Generate instruction binary file for simulation')
203
- parser.add_argument('--sim_cmode', type=str, default='mxint8', choices=['mxint8', 'mxint4'], help='Simulation compute mode')
204
- parser.add_argument('--sim_llm_head', action='store_true', help='if true, generate LLM head instructions for FPGA simulation')
205
- args = parser.parse_args()
206
-
207
- SIM_CMODE = args.sim_cmode
208
- TEST_OP_GROUP = 'demo'
209
- SIM_LLM_HEAD = args.sim_llm_head
210
- SIM_LOGIT_FLAG = '' if SIM_LLM_HEAD else '_no'
211
- SIM_NUM_TOKEN = 1024
212
- SIM_NUM_LAYER = 32
213
-
214
- if __name__ == "__main__":
215
- if TEST_OP_GROUP == 'demo':
216
- current_token = 0
217
- tmp_output_dim = 1
218
- current_inst_cnt = 0
219
- file_name = "instruction_{}T_32L{}_write_back_logit_everyT_{}.bin".format(SIM_NUM_TOKEN, SIM_LOGIT_FLAG, SIM_CMODE)
220
-
221
- with open(file_name, "wb") as f:
222
- for tk in range(SIM_NUM_TOKEN):
223
- print("Gen {} th token instruction start".format(tk+1))
224
- # LOAD IN_ACT
225
- inst_load_in_act.input_addr = ACT_BASE_ADDR + ACT_TOKEN_OFFSET * tk
226
- inst_load_in_act.token = current_token
227
- binary_instruction = inst_load_in_act.to_binary(current_inst_cnt, 'LOAD IN_ACT')
228
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
229
- current_inst_cnt += 1
230
- # LOAD IN_ACT
231
- inst_load_in_act_s.input_addr = ACTS_BASE_ADDR + ACTS_TOKEN_OFFSET * tk
232
- inst_load_in_act_s.token = current_token
233
- binary_instruction = inst_load_in_act_s.to_binary(current_inst_cnt, 'LOAD IN_ACT_S')
234
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
235
- current_inst_cnt += 1
236
-
237
- for l in range(SIM_NUM_LAYER):
238
- print("Gen {} th token, {} th layer instruction".format(tk+1, l+1))
239
- # RRMS
240
- inst_prerrms.token = current_token
241
- binary_instruction = inst_prerrms.to_binary(current_inst_cnt, 'PRE RRMS')
242
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
243
- current_inst_cnt += 1
244
- # LOAD NORM weight
245
- inst_load_pre_wm.input_addr = PRENORM_ADDR + PRENORM_LAYER_OFFSET * l
246
- inst_load_pre_wm.token = current_token
247
- binary_instruction = inst_load_pre_wm.to_binary(current_inst_cnt, 'LOAD PRENORM weight')
248
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
249
- current_inst_cnt += 1
250
- # LOAD NORM weight scale
251
- inst_load_pre_ws.input_addr = PRENORMS_ADDR + PRENORMS_LAYER_OFFSET * l
252
- inst_load_pre_ws.token = current_token
253
- binary_instruction = inst_load_pre_ws.to_binary(current_inst_cnt, 'LOAD PRENORM weight scale')
254
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
255
- current_inst_cnt += 1
256
- # RMSNORM
257
- inst_prermsnorm.token = current_token
258
- binary_instruction = inst_prermsnorm.to_binary(current_inst_cnt, 'PRE RMSNORM')
259
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
260
- current_inst_cnt += 1
261
-
262
- if SIM_CMODE == 'mxint8':
263
- # MLP_WM Wv
264
- inst_mlp_wm_v.input_addr = WV_BASE_ADDR + WV_LAYER_OFFSET * l
265
- inst_mlp_wm_v.scale_addr = WVS_BASE_ADDR + WVS_LAYER_OFFSET * l
266
- inst_mlp_wm_v.token = current_token
267
- binary_instruction = inst_mlp_wm_v.to_binary(current_inst_cnt, 'MLP_WM Wv')
268
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
269
- current_inst_cnt += 1
270
- else:
271
- # MLP_WM Wv
272
- inst_mlp_wm_v_mxint4.input_addr = WV_BASE_ADDR + (WV_LAYER_OFFSET//2) * l
273
- inst_mlp_wm_v_mxint4.scale_addr = WVS_BASE_ADDR + WVS_LAYER_OFFSET * l
274
- inst_mlp_wm_v_mxint4.token = current_token
275
- binary_instruction = inst_mlp_wm_v_mxint4.to_binary(current_inst_cnt, 'MLP_WM Wv')
276
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
277
- current_inst_cnt += 1
278
-
279
- # STORE V elem
280
- inst_store_v.output_addr = VC_BASE_ADDR + VC_LAYER_OFFSET * l + VC_TOKEN_OFFSET * tk
281
- inst_store_v.token = current_token
282
- binary_instruction = inst_store_v.to_binary(current_inst_cnt, 'STORE V elem')
283
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
284
- current_inst_cnt += 1
285
- # STORE V scale
286
- inst_store_v_s.output_addr = VCS_BASE_ADDR + VCS_LAYER_OFFSET * l + VCS_TOKEN_OFFSET * tk
287
- inst_store_v_s.token = current_token
288
- binary_instruction = inst_store_v_s.to_binary(current_inst_cnt, 'STORE V scale')
289
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
290
- current_inst_cnt += 1
291
-
292
- if SIM_CMODE == 'mxint8':
293
- # MLP_WM Wk
294
- inst_mlp_wm_k.input_addr = WK_BASE_ADDR + WK_LAYER_OFFSET * l
295
- inst_mlp_wm_k.scale_addr = WKS_BASE_ADDR + WKS_LAYER_OFFSET * l
296
- inst_mlp_wm_k.token = current_token
297
- binary_instruction = inst_mlp_wm_k.to_binary(current_inst_cnt, 'MLP_WM Wk')
298
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
299
- current_inst_cnt += 1
300
- else:
301
- # MLP_WM Wk
302
- inst_mlp_wm_k_mxint4.input_addr = WK_BASE_ADDR + (WK_LAYER_OFFSET//2) * l
303
- inst_mlp_wm_k_mxint4.scale_addr = WKS_BASE_ADDR + WKS_LAYER_OFFSET * l
304
- inst_mlp_wm_k_mxint4.token = current_token
305
- binary_instruction = inst_mlp_wm_k_mxint4.to_binary(current_inst_cnt, 'MLP_WM Wk')
306
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
307
- current_inst_cnt += 1
308
-
309
- # ROPE WB (K)
310
- inst_rope_wb.input_addr = ROPE_BASE_ADDR + ROPE_TOKEN_OFFSET * tk
311
- inst_rope_wb.token = current_token
312
- binary_instruction = inst_rope_wb.to_binary(current_inst_cnt, 'ROPE WB (K)')
313
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
314
- current_inst_cnt += 1
315
- # STORE K elem
316
- inst_store_k.output_addr = KC_BASE_ADDR + KC_LAYER_OFFSET * l + KC_TOKEN_OFFSET * tk
317
- inst_store_k.token = current_token
318
- binary_instruction = inst_store_k.to_binary(current_inst_cnt, 'STORE K elem')
319
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
320
- current_inst_cnt += 1
321
- # STORE K scale
322
- inst_store_k_s.output_addr = KCS_BASE_ADDR + KCS_LAYER_OFFSET * l + KCS_TOKEN_OFFSET * tk
323
- inst_store_k_s.token = current_token
324
- binary_instruction = inst_store_k_s.to_binary(current_inst_cnt, 'STORE K scale')
325
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
326
- current_inst_cnt += 1
327
-
328
- if SIM_CMODE == 'mxint8':
329
- # MLP_WM Wq
330
- inst_mlp_wm_q.input_addr = WQ_BASE_ADDR + WQ_LAYER_OFFSET * l
331
- inst_mlp_wm_q.scale_addr = WQS_BASE_ADDR + WQS_LAYER_OFFSET * l
332
- inst_mlp_wm_q.token = current_token
333
- binary_instruction = inst_mlp_wm_q.to_binary(current_inst_cnt, 'MLP_WM Wq')
334
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
335
- current_inst_cnt += 1
336
- else:
337
- # MLP_WM Wq
338
- inst_mlp_wm_q_mxint4.input_addr = WQ_BASE_ADDR + (WQ_LAYER_OFFSET//2) * l
339
- inst_mlp_wm_q_mxint4.scale_addr = WQS_BASE_ADDR + WQS_LAYER_OFFSET * l
340
- inst_mlp_wm_q_mxint4.token = current_token
341
- binary_instruction = inst_mlp_wm_q_mxint4.to_binary(current_inst_cnt, 'MLP_WM Wq')
342
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
343
- current_inst_cnt += 1
344
-
345
- # ROPE NO_WB (Q)
346
- inst_rope_nwb.input_addr = ROPE_BASE_ADDR + ROPE_TOKEN_OFFSET * tk
347
- inst_rope_nwb.token = current_token
348
- binary_instruction = inst_rope_nwb.to_binary(current_inst_cnt, 'ROPE NO_WB (Q)')
349
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
350
- current_inst_cnt += 1
351
- # MLP_QKT
352
- inst_mlp_qkt.input_addr = KC_BASE_ADDR + KC_LAYER_OFFSET * l
353
- inst_mlp_qkt.scale_addr = KCS_BASE_ADDR + KCS_LAYER_OFFSET * l
354
- inst_mlp_qkt.output_dim = tmp_output_dim
355
- inst_mlp_qkt.input_dim = tmp_output_dim * inst_mlp_qkt.num_cb_ws
356
- inst_mlp_qkt.token = current_token
357
- binary_instruction = inst_mlp_qkt.to_binary(current_inst_cnt, 'MLP_QKT')
358
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
359
- current_inst_cnt += 1
360
- # QKT_M_RSQRT
361
- inst_qkt_m_rsqrt.output_dim = tmp_output_dim
362
- inst_qkt_m_rsqrt.input_dim = tmp_output_dim * inst_qkt_m_rsqrt.num_cb_ws
363
- inst_qkt_m_rsqrt.token = current_token
364
- binary_instruction = inst_qkt_m_rsqrt.to_binary(current_inst_cnt, 'QKT_M_RSQRT')
365
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
366
- current_inst_cnt += 1
367
- # SOFTMAX
368
- inst_softmax.output_dim = tmp_output_dim
369
- inst_softmax.token = current_token
370
- binary_instruction = inst_softmax.to_binary(current_inst_cnt, 'SOFTMAX')
371
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
372
- current_inst_cnt += 1
373
- # MLP_HP (SxV)
374
- inst_mlp_hp.input_addr = VC_BASE_ADDR + VC_LAYER_OFFSET * l
375
- inst_mlp_hp.scale_addr = VCS_BASE_ADDR + VCS_LAYER_OFFSET * l
376
- inst_mlp_hp.input_dim = tmp_output_dim * inst_mlp_hp.num_cb_ws
377
- inst_mlp_hp.token = current_token
378
- binary_instruction = inst_mlp_hp.to_binary(current_inst_cnt, 'MLP_HP (SxV)')
379
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
380
- current_inst_cnt += 1
381
-
382
- if SIM_CMODE == 'mxint8':
383
- # MLP_WM Wo
384
- inst_mlp_wm_o.input_addr = WO_BASE_ADDR + WO_LAYER_OFFSET * l
385
- inst_mlp_wm_o.scale_addr = WOS_BASE_ADDR + WOS_LAYER_OFFSET * l
386
- inst_mlp_wm_o.token = current_token
387
- binary_instruction = inst_mlp_wm_o.to_binary(current_inst_cnt, 'MLP_WM Wo')
388
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
389
- current_inst_cnt += 1
390
- else:
391
- # MLP_WM Wo
392
- inst_mlp_wm_o_mxint4.input_addr = WO_BASE_ADDR + (WO_LAYER_OFFSET//2) * l
393
- inst_mlp_wm_o_mxint4.scale_addr = WOS_BASE_ADDR + WOS_LAYER_OFFSET * l
394
- inst_mlp_wm_o_mxint4.token = current_token
395
- binary_instruction = inst_mlp_wm_o_mxint4.to_binary(current_inst_cnt, 'MLP_WM Wo')
396
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
397
- current_inst_cnt += 1
398
-
399
- # LOAD RESI
400
- inst_load_resi.input_addr = RESI_BASE_ADDR + RESI_TOKEN_OFFSET * tk
401
- inst_load_resi.token = current_token
402
- binary_instruction = inst_load_resi.to_binary(current_inst_cnt, 'LOAD RESI')
403
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
404
- current_inst_cnt += 1
405
- # LOAD RESI_S
406
- inst_load_resi_s.input_addr = RESIS_BASE_ADDR + RESIS_TOKEN_OFFSET * tk
407
- inst_load_resi_s.token = current_token
408
- binary_instruction = inst_load_resi_s.to_binary(current_inst_cnt, 'LOAD RESI_S')
409
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
410
- current_inst_cnt += 1
411
- # RESIDUAL
412
- inst_residual.token = current_token
413
- binary_instruction = inst_residual.to_binary(current_inst_cnt, 'RESIDUAL')
414
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
415
- current_inst_cnt += 1
416
- # STORE RESI
417
- inst_store_resi.output_addr = RESI_BASE_ADDR + RESI_TOKEN_OFFSET * tk
418
- inst_store_resi.token = current_token
419
- binary_instruction = inst_store_resi.to_binary(current_inst_cnt, 'STORE RESI')
420
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
421
- current_inst_cnt += 1
422
- # STORE RESI_S
423
- inst_store_resi_s.output_addr = RESIS_BASE_ADDR + RESIS_TOKEN_OFFSET * tk
424
- inst_store_resi_s.token = current_token
425
- binary_instruction = inst_store_resi_s.to_binary(current_inst_cnt, 'STORE RESI_S')
426
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
427
- current_inst_cnt += 1
428
- # RRMS
429
- inst_postrrms.token = current_token
430
- binary_instruction = inst_postrrms.to_binary(current_inst_cnt, 'POST RRMS')
431
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
432
- current_inst_cnt += 1
433
- # LOAD NORM weight
434
- inst_load_post_wm.input_addr = POSTNORM_ADDR + POSTNORM_LAYER_OFFSET * l
435
- inst_load_post_wm.token = current_token
436
- binary_instruction = inst_load_post_wm.to_binary(current_inst_cnt, 'LOAD POSTNORM weight')
437
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
438
- current_inst_cnt += 1
439
- # LOAD NORM weight scale
440
- inst_load_post_ws.input_addr = POSTNORMS_ADDR + POSTNORMS_LAYER_OFFSET * l
441
- inst_load_post_ws.token = current_token
442
- binary_instruction = inst_load_post_ws.to_binary(current_inst_cnt, 'LOAD POSTNORM weight scale')
443
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
444
- current_inst_cnt += 1
445
- # RMSNORM
446
- inst_postrmsnorm.token = current_token
447
- binary_instruction = inst_postrmsnorm.to_binary(current_inst_cnt, 'POST RMSNORM')
448
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
449
- current_inst_cnt += 1
450
-
451
- if SIM_CMODE == 'mxint8':
452
- # MLP_WM W1
453
- inst_mlp_wm_w1.input_addr = W1_BASE_ADDR + W1_LAYER_OFFSET * l
454
- inst_mlp_wm_w1.scale_addr = W1S_BASE_ADDR + W1S_LAYER_OFFSET * l
455
- inst_mlp_wm_w1.token = current_token
456
- binary_instruction = inst_mlp_wm_w1.to_binary(current_inst_cnt, 'MLP_WM W1')
457
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
458
- current_inst_cnt += 1
459
- # MLP_WM W3
460
- inst_mlp_wm_w3.input_addr = W3_BASE_ADDR + W3_LAYER_OFFSET * l
461
- inst_mlp_wm_w3.scale_addr = W3S_BASE_ADDR + W3S_LAYER_OFFSET * l
462
- inst_mlp_wm_w3.token = current_token
463
- binary_instruction = inst_mlp_wm_w3.to_binary(current_inst_cnt, 'MLP_WM W3')
464
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
465
- current_inst_cnt += 1
466
- else:
467
- # MLP_WM W1
468
- inst_mlp_wm_w1_mxint4.input_addr = W1_BASE_ADDR + (W1_LAYER_OFFSET//2) * l
469
- inst_mlp_wm_w1_mxint4.scale_addr = W1S_BASE_ADDR + W1S_LAYER_OFFSET * l
470
- inst_mlp_wm_w1_mxint4.token = current_token
471
- binary_instruction = inst_mlp_wm_w1_mxint4.to_binary(current_inst_cnt, 'MLP_WM W1')
472
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
473
- current_inst_cnt += 1
474
- # MLP_WM W3
475
- inst_mlp_wm_w3_mxint4.input_addr = W3_BASE_ADDR + (W3_LAYER_OFFSET//2) * l
476
- inst_mlp_wm_w3_mxint4.scale_addr = W3S_BASE_ADDR + W3S_LAYER_OFFSET * l
477
- inst_mlp_wm_w3_mxint4.token = current_token
478
- binary_instruction = inst_mlp_wm_w3_mxint4.to_binary(current_inst_cnt, 'MLP_WM W3')
479
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
480
- current_inst_cnt += 1
481
-
482
- # GATE
483
- inst_gate.token = current_token
484
- binary_instruction = inst_gate.to_binary(current_inst_cnt, 'GATE')
485
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
486
- current_inst_cnt += 1
487
-
488
- if SIM_CMODE == 'mxint8':
489
- # MLP_WM W2
490
- inst_mlp_wm_w2.input_addr = W2_BASE_ADDR + W2_LAYER_OFFSET * l
491
- inst_mlp_wm_w2.scale_addr = W2S_BASE_ADDR + W2S_LAYER_OFFSET * l
492
- inst_mlp_wm_w2.token = current_token
493
- binary_instruction = inst_mlp_wm_w2.to_binary(current_inst_cnt, 'MLP_WM W2')
494
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
495
- current_inst_cnt += 1
496
- else:
497
- # MLP_WM W2
498
- inst_mlp_wm_w2_mxint4.input_addr = W2_BASE_ADDR + (W2_LAYER_OFFSET//2) * l
499
- inst_mlp_wm_w2_mxint4.scale_addr = W2S_BASE_ADDR + W2S_LAYER_OFFSET * l
500
- inst_mlp_wm_w2_mxint4.token = current_token
501
- binary_instruction = inst_mlp_wm_w2_mxint4.to_binary(current_inst_cnt, 'MLP_WM W2')
502
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
503
- current_inst_cnt += 1
504
-
505
- # LOAD RESI
506
- inst_load_resi.input_addr = RESI_BASE_ADDR + RESI_TOKEN_OFFSET * tk
507
- inst_load_resi.token = current_token
508
- binary_instruction = inst_load_resi.to_binary(current_inst_cnt, 'LOAD RESI')
509
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
510
- current_inst_cnt += 1
511
- # LOAD RESI_S
512
- inst_load_resi_s.input_addr = RESIS_BASE_ADDR + RESIS_TOKEN_OFFSET * tk
513
- inst_load_resi_s.token = current_token
514
- binary_instruction = inst_load_resi_s.to_binary(current_inst_cnt, 'LOAD RESI_S')
515
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
516
- current_inst_cnt += 1
517
- # RESIDUAL
518
- inst_residual.token = current_token
519
- binary_instruction = inst_residual.to_binary(current_inst_cnt, 'RESIDUAL')
520
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
521
- current_inst_cnt += 1
522
- # STORE RESI
523
- inst_store_resi.output_addr = RESI_BASE_ADDR + RESI_TOKEN_OFFSET * tk
524
- inst_store_resi.token = current_token
525
- binary_instruction = inst_store_resi.to_binary(current_inst_cnt, 'STORE RESI')
526
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
527
- current_inst_cnt += 1
528
- # STORE RESI_S
529
- inst_store_resi_s.output_addr = RESIS_BASE_ADDR + RESIS_TOKEN_OFFSET * tk
530
- inst_store_resi_s.token = current_token
531
- binary_instruction = inst_store_resi_s.to_binary(current_inst_cnt, 'STORE RESI_S')
532
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
533
- current_inst_cnt += 1
534
- # END OF LAYER
535
-
536
- # if tk == SIM_NUM_TOKEN-1 and SIM_NUM_LAYER == 32 and SIM_LLM_HEAD:
537
- if SIM_NUM_LAYER == 32 and SIM_LLM_HEAD:
538
- # RRMS
539
- inst_outrrms.token = current_token
540
- binary_instruction = inst_outrrms.to_binary(current_inst_cnt, 'OUT RRMS')
541
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
542
- current_inst_cnt += 1
543
- # LOAD NORM weight
544
- inst_load_out_wm.input_addr = OUTNORM_ADDR
545
- inst_load_out_wm.token = current_token
546
- binary_instruction = inst_load_out_wm.to_binary(current_inst_cnt, 'LOAD OUTNORM weight')
547
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
548
- current_inst_cnt += 1
549
- # LOAD NORM weight scale
550
- inst_load_out_ws.input_addr = OUTNORMS_ADDR
551
- inst_load_out_ws.token = current_token
552
- binary_instruction = inst_load_out_ws.to_binary(current_inst_cnt, 'LOAD OUTNORM weight scale')
553
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
554
- current_inst_cnt += 1
555
- # RMSNORM
556
- inst_outrmsnorm.token = current_token
557
- binary_instruction = inst_outrmsnorm.to_binary(current_inst_cnt, 'OUT RMSNORM')
558
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
559
- current_inst_cnt += 1
560
-
561
- for it in range(8):
562
- if SIM_CMODE == 'mxint8':
563
- # MLP_WM Whead
564
- inst_mlp_wm_whead.input_addr = WHEAD_BASE_ADDR + WHEAD_LAYER_OFFSET * it
565
- inst_mlp_wm_whead.scale_addr = WHEADS_BASE_ADDR + WHEADS_LAYER_OFFSET * it
566
- inst_mlp_wm_whead.token = current_token
567
- binary_instruction = inst_mlp_wm_whead.to_binary(current_inst_cnt, 'MLP_WM Whead')
568
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
569
- current_inst_cnt += 1
570
- else:
571
- # MLP_WM Whead
572
- inst_mlp_wm_whead_mxint4.input_addr = WHEAD_BASE_ADDR + (WHEAD_LAYER_OFFSET//2) * it
573
- inst_mlp_wm_whead_mxint4.scale_addr = WHEADS_BASE_ADDR + WHEADS_LAYER_OFFSET * it
574
- inst_mlp_wm_whead_mxint4.token = current_token
575
- binary_instruction = inst_mlp_wm_whead_mxint4.to_binary(current_inst_cnt, 'MLP_WM Whead')
576
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
577
- current_inst_cnt += 1
578
-
579
- # STORE HEAD
580
- inst_store_head.output_addr = HEAD_OUT_BASE_ADDR + HEAD_OUT_LAYER_OFFSET * it
581
- inst_store_head.token = current_token
582
- binary_instruction = inst_store_head.to_binary(current_inst_cnt, 'STORE HEAD')
583
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
584
- current_inst_cnt += 1
585
- # STORE HEAD_S
586
- inst_store_head_s.output_addr = HEADS_OUT_BASE_ADDR + HEADS_OUT_LAYER_OFFSET * it
587
- inst_store_head_s.token = current_token
588
- binary_instruction = inst_store_head_s.to_binary(current_inst_cnt, 'STORE HEAD_S')
589
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
590
- current_inst_cnt += 1
591
-
592
-
593
- # Adjust parameters
594
- tmp_output_dim += 1
595
- current_token += 1
596
- # END OF TOKEN
597
-
598
- # end instruction NOP
599
- binary_instruction = inst_nop.to_binary(current_inst_cnt, 'End instruction NOP')
600
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
601
- current_inst_cnt += 1
602
- # END OF SIMULATION
603
- print('INFO: Total Instruction Count: {}'.format(current_inst_cnt))
604
-
605
- elif TEST_OP_GROUP == 'test_bw':
606
- current_inst_cnt = 0
607
- with open("../instruction/instruction_test_bw.bin", "wb") as f:
608
- for i in range(SIM_NUM_TOKEN):
609
- binary_instruction = inst_test_bw.to_binary(current_inst_cnt, 'TEST BW')
610
- f.write(int(binary_instruction, 2).to_bytes(len(binary_instruction) // 8, byteorder='little'))
611
- current_inst_cnt += 1
612
- print('INFO: Total Instruction Count: {}'.format(current_inst_cnt))