radames HF staff commited on
Commit
c7eecb3
1 Parent(s): 0c1c7c9
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.gif filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/modularml/mojo/blob/main/examples/docker/Dockerfile.mojosdk
2
+ # ===----------------------------------------------------------------------=== #
3
+ # Copyright (c) 2023, Modular Inc. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License v2.0 with LLVM Exceptions:
6
+ # https://llvm.org/LICENSE.txt
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ===----------------------------------------------------------------------=== #
14
+
15
+ # Example command line:
16
+ # Use no-cache to force docker to rebuild layers of the image by downloading the SDK from the repos
17
+ # docker build --no-cache \
18
+ # --build-arg AUTH_KEY=<your-modular-auth-key>
19
+ # --pull -t modular/mojo-v0.2-`date '+%Y%d%m-%H%M'` \
20
+ # --file Dockerfile.mojosdk .
21
+
22
+ FROM ubuntu:20.04
23
+
24
+ ARG DEFAUL_TZ=America/Los_Angeles
25
+ ENV DEFAULT_TZ=$DEFAULT_TZ
26
+ ARG MODULAR_HOME=/home/user/.modular
27
+ ENV MODULAR_HOME=$MODULAR_HOME
28
+
29
+ RUN apt-get update \
30
+ && DEBIAN_FRONTEND=noninteractive $DEFAULT_TZ apt-get install -y \
31
+ tzdata \
32
+ vim \
33
+ sudo \
34
+ curl \
35
+ python3 \
36
+ pip \
37
+ wget \
38
+ && python3 -m pip install \
39
+ jupyterlab \
40
+ ipykernel \
41
+ matplotlib \
42
+ ipywidgets \
43
+ gradio
44
+
45
+ RUN curl -fsSL https://repo.anaconda.com/miniconda/Miniconda3-py38_23.5.2-0-Linux-x86_64.sh > /tmp/miniconda.sh \
46
+ && chmod +x /tmp/miniconda.sh \
47
+ && /tmp/miniconda.sh -b -p /opt/conda
48
+
49
+ ARG AUTH_KEY=DEFAULT_KEY
50
+ ENV AUTH_KEY=$AUTH_KEY
51
+
52
+ RUN curl https://get.modular.com | MODULAR_AUTH=$AUTH_KEY sh - \
53
+ && modular install mojo
54
+
55
+ RUN useradd -m -u 1000 user
56
+ RUN chown -R user $MODULAR_HOME
57
+
58
+ ENV PATH="$PATH:/opt/conda/bin:$MODULAR_HOME/pkg/packages.modular.com_mojo/bin"
59
+
60
+ RUN conda init
61
+ RUN pip install gradio
62
+
63
+ USER user
64
+ WORKDIR $HOME/app
65
+
66
+ COPY --chown=user . $HOME/app
67
+ RUN wget -c https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
68
+
69
+ # CMD ["mojo", "llama2.mojo"]
70
+ CMD ["python3", "gradio_app.py"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Aydyn Tairov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
assets/llama2.mojo-demo.gif ADDED

Git LFS Details

  • SHA256: 6801c1dca3aacf420459f4c7e20820a7a90dd75cc46f7347b18c87aa913fa7d6
  • Pointer size: 132 Bytes
  • Size of remote file: 2.49 MB
gradio_app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import sys
4
+ import os
5
+
6
+
7
+ async def generate(prompt):
8
+ # os.environ["PROMPT"] = prompt
9
+ # stream stout
10
+ process = subprocess.Popen(
11
+ ["mojo", "llama2.mojo"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
12
+ )
13
+ text = ""
14
+ for char in iter(lambda: process.stdout.read(1), b""):
15
+ char_decoded = char.decode()
16
+ sys.stdout.write(char_decoded)
17
+ text += char_decoded
18
+ yield text
19
+
20
+
21
+ output_text = gr.Textbox(label="Generated Text")
22
+
23
+ demo = gr.Interface(
24
+ fn=generate,
25
+ inputs=None,
26
+ outputs=output_text,
27
+ description="""
28
+ # llama2.🔥
29
+ ## [Mojo](https://docs.modular.com/mojo/) implementation of [llama2.c](https://github.com/karpathy/llama2.c) by [@tairov](https://github.com/tairov)
30
+ Source: https://github.com/tairov/llama2.mojo
31
+ """,
32
+ allow_flagging="never",
33
+ )
34
+
35
+ demo.queue()
36
+ demo.launch(server_name="0.0.0.0")
llama2.mojo ADDED
@@ -0,0 +1,747 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from math import round
2
+ import math
3
+
4
+ from memory import memset_zero, memcpy
5
+ from memory.unsafe import DTypePointer
6
+ from random import rand
7
+ from sys.info import simdwidthof
8
+ from builtin import string
9
+ import time
10
+ import random
11
+ import os
12
+
13
+ from runtime.llcl import num_cores
14
+
15
+ from read import BufReader, File
16
+ from memory.buffer import Buffer
17
+
18
+ from python import Python
19
+
20
+ # The SIMD vector width.
21
+ from algorithm import vectorize, parallelize
22
+ from algorithm import sum
23
+
24
+ alias nelts = (2 * simdwidthof[DType.float32]())
25
+
26
+ alias PointerString = Pointer[UInt8]
27
+ alias BufferPtrType = DTypePointer[DType.uint8]
28
+ alias BufferPtrFloat32 = DTypePointer[DType.float32]
29
+ alias PointerStrings = Pointer[PointerString]
30
+
31
+
32
+ struct Matrix3:
33
+ var data: BufferPtrFloat32
34
+ var rows: Int
35
+ var cols: Int
36
+ var layers: Int
37
+ var allocated: Int
38
+
39
+ fn __init__(inout self, layers: Int, rows: Int, cols: Int):
40
+ self.data = BufferPtrFloat32.alloc(0)
41
+ self.rows = rows
42
+ self.cols = cols
43
+ self.layers = layers
44
+ self.allocated = 0
45
+
46
+ @always_inline
47
+ fn alloc(inout self, fill: Int = 0):
48
+ self.data = BufferPtrFloat32.alloc(self.size())
49
+ self.allocated = 1
50
+ if fill == 1:
51
+ self.zero()
52
+
53
+ @always_inline
54
+ fn alloc_zero(inout self):
55
+ self.alloc(1)
56
+
57
+ @always_inline
58
+ fn set_buf_ptr(inout self, ptr: BufferPtrFloat32):
59
+ self.data = ptr
60
+
61
+ fn __del__(owned self):
62
+ if self.allocated == 1:
63
+ self.data.free()
64
+
65
+ @always_inline
66
+ fn zero(inout self):
67
+ memset_zero(self.data, self.layers * self.rows * self.cols)
68
+
69
+ @always_inline
70
+ fn size(inout self) -> Int:
71
+ return self.layers * self.cols * self.rows
72
+
73
+ @always_inline
74
+ fn __getitem__(self, z: Int, y: Int, x: Int) -> Float32:
75
+ return self.load[1](z, y, x)
76
+
77
+ @always_inline
78
+ fn load[nelts: Int](self, z: Int, y: Int, x: Int) -> SIMD[DType.float32, nelts]:
79
+ return self.data.simd_load[nelts](z * self.layers + y * self.cols + x)
80
+
81
+ @always_inline
82
+ fn __setitem__(self, z: Int, y: Int, x: Int, val: Float32):
83
+ return self.store[1](z, y, x, val)
84
+
85
+ @always_inline
86
+ fn store[nelts: Int](self, z: Int, y: Int, x: Int, val: SIMD[DType.float32, nelts]):
87
+ self.data.simd_store[nelts](z * self.layers + y * self.cols + x, val)
88
+
89
+
90
+ struct Matrix:
91
+ var data: BufferPtrFloat32
92
+ var rows: Int
93
+ var cols: Int
94
+ var allocated: Int
95
+
96
+ fn __init__(inout self, rows: Int, cols: Int):
97
+ self.data = BufferPtrFloat32.alloc(0)
98
+ self.rows = rows
99
+ self.cols = cols
100
+ self.allocated = 0
101
+
102
+ fn __init__(inout self, cols: Int):
103
+ self.data = BufferPtrFloat32.alloc(0)
104
+ self.rows = 1
105
+ self.cols = cols
106
+ self.allocated = 0
107
+
108
+ fn __del__(owned self):
109
+ if self.allocated == 1:
110
+ self.data.free()
111
+
112
+ fn alloc(inout self, fill: Int = 0):
113
+ self.data = BufferPtrFloat32.alloc(self.size())
114
+ self.allocated = 1
115
+ if fill == 1:
116
+ self.zero()
117
+
118
+ fn alloc_zero(inout self):
119
+ self.alloc(1)
120
+
121
+ fn zero(inout self):
122
+ memset_zero(self.data, self.rows * self.cols)
123
+
124
+ fn set_buf_ptr(inout self, ptr: BufferPtrFloat32):
125
+ self.data = ptr
126
+
127
+ # set buf ptr with redefined rows, colss
128
+ fn set_buf_ptr(inout self, ptr: BufferPtrFloat32, rows: Int, cols: Int):
129
+ self.data = ptr
130
+ self.rows = rows
131
+ self.cols = cols
132
+
133
+ fn size(inout self) -> Int:
134
+ return self.cols * self.rows
135
+
136
+ @always_inline
137
+ fn __getitem__(self, y: Int, x: Int) -> Float32:
138
+ return self.load[1](y, x)
139
+
140
+ @always_inline
141
+ fn __getitem__(self, x: Int) -> Float32:
142
+ return self.load[1](0, x)
143
+
144
+ @always_inline
145
+ fn load[nelts: Int](self, y: Int, x: Int) -> SIMD[DType.float32, nelts]:
146
+ return self.data.simd_load[nelts](y * self.cols + x)
147
+
148
+ @always_inline
149
+ fn __setitem__(self, y: Int, x: Int, val: Float32):
150
+ return self.store[1](y, x, val)
151
+
152
+ @always_inline
153
+ fn __setitem__(self, x: Int, val: Float32):
154
+ return self.store[1](0, x, val)
155
+
156
+ @always_inline
157
+ fn store[nelts: Int](self, y: Int, x: Int, val: SIMD[DType.float32, nelts]):
158
+ self.data.simd_store[nelts](y * self.cols + x, val)
159
+
160
+ @always_inline
161
+ fn load[nelts: Int](self, x: Int) -> SIMD[DType.float32, nelts]:
162
+ return self.data.simd_load[nelts](x)
163
+
164
+ @always_inline
165
+ fn store[nelts: Int](self, x: Int, val: SIMD[DType.float32, nelts]):
166
+ self.data.simd_store[nelts](x, val)
167
+
168
+
169
+ fn read_val_int(inout buf: FileBuf) -> Int:
170
+ # DTypePointer[DType.ui8](buf.data).bitcast[DType.ui8]()
171
+ let data = buf.data.offset(buf.offset).bitcast[DType.uint32]()
172
+ let result = data.simd_load[1](0)
173
+ buf.offset += 4
174
+ return result.to_int()
175
+
176
+
177
+ fn read_val_float32(inout buf: FileBuf) -> Float32:
178
+ # DTypePointer[DType.ui8](buf.data).bitcast[DType.ui8]()
179
+ let val = buf.data.offset(buf.offset).bitcast[DType.float32]().simd_load[1](0)
180
+ buf.offset += 4
181
+ return val
182
+
183
+
184
+ fn read_val_str(inout buf: FileBuf, slen: Int) -> PointerString:
185
+ let str = PointerString.alloc(slen + 1)
186
+ for i in range(slen):
187
+ str.store(i, buf.data.simd_load[1](buf.offset))
188
+ buf.offset += 1
189
+ str.store(slen, 0)
190
+
191
+ return str
192
+
193
+
194
+ struct FileBuf:
195
+ var data: BufferPtrType
196
+ var offset: Int
197
+ var size: Int
198
+
199
+ fn __init__(inout self):
200
+ self.data = BufferPtrType()
201
+ self.offset = 0
202
+ self.size = 0
203
+
204
+ fn move_offset(inout self, size: Int):
205
+ self.offset += size
206
+
207
+ fn bitcast_offset_float32(inout self, size: Int) -> BufferPtrFloat32:
208
+ let ret = self.data.offset(self.offset).bitcast[DType.float32]()
209
+ self.offset += size * sizeof[DType.float32]()
210
+ return ret
211
+
212
+
213
+ struct Tokenizer:
214
+ var vocab: PointerStrings
215
+ var vocab_scores: BufferPtrFloat32
216
+ var max_token_length: Int
217
+ var vocab_size: Int
218
+
219
+ fn __init__(inout self, vocab_size: Int):
220
+ self.vocab_size = vocab_size
221
+ self.vocab = PointerStrings.alloc(vocab_size)
222
+ self.vocab_scores = BufferPtrFloat32.alloc(vocab_size)
223
+ self.max_token_length = 0
224
+
225
+
226
+ struct Config:
227
+ var dim: Int
228
+ var hidden_dim: Int
229
+ var n_layers: Int
230
+ var n_heads: Int
231
+ var n_kv_heads: Int
232
+ var vocab_size: Int
233
+ var seq_len: Int
234
+
235
+ fn __init__(inout self):
236
+ self.dim = 0
237
+ self.hidden_dim = 0
238
+ self.n_layers = 0
239
+ self.n_heads = 0
240
+ self.n_kv_heads = 0
241
+ self.vocab_size = 0
242
+ self.seq_len = 0
243
+
244
+
245
+ struct RunState:
246
+ var x: Matrix # activation at current time stamp (dim,)
247
+ var xb: Matrix # same, but inside a residual branch (dim,)
248
+ var xb2: Matrix # an additional buffer just for convenience (dim,)
249
+ var hb: Matrix # buffer for hidden dimension in the ffn (hidden_dim,)
250
+ var hb2: Matrix # buffer for hidden dimension in the ffn (hidden_dim,)
251
+ var q: Matrix # query (dim,)
252
+ var k: Matrix # key (dim,)
253
+ var v: Matrix # value (dim,)
254
+ var att: Matrix # buffer for scores/attention values (n_heads, seq_len)
255
+ var logits: Matrix # output logits
256
+ var key_cache: Matrix3 # (layer, seq_len, dim)
257
+ var value_cache: Matrix3 # (layer, seq_len, dim)
258
+
259
+ fn __init__(inout self, config: Config):
260
+ self.x = Matrix(config.dim)
261
+ self.x.alloc_zero()
262
+ self.xb = Matrix(config.dim)
263
+ self.xb.alloc_zero()
264
+ self.xb2 = Matrix(config.dim)
265
+ self.xb2.alloc_zero()
266
+ self.hb = Matrix(config.hidden_dim)
267
+ self.hb.alloc_zero()
268
+ self.hb2 = Matrix(config.hidden_dim)
269
+ self.hb2.alloc_zero()
270
+ self.q = Matrix(config.dim)
271
+ self.q.alloc_zero()
272
+ self.k = Matrix(config.dim)
273
+ self.k.alloc_zero()
274
+ self.v = Matrix(config.dim)
275
+ self.v.alloc_zero()
276
+ self.att = Matrix(config.n_heads, config.seq_len)
277
+ self.att.alloc_zero()
278
+ self.logits = Matrix(config.vocab_size)
279
+ self.logits.alloc_zero()
280
+ self.key_cache = Matrix3(config.n_layers, config.seq_len, config.dim)
281
+ self.key_cache.alloc_zero()
282
+ self.value_cache = Matrix3(config.n_layers, config.seq_len, config.dim)
283
+ self.value_cache.alloc_zero()
284
+
285
+
286
+ struct TransformerWeights:
287
+ var token_embedding_table: Matrix
288
+ var freq_cis_real: Matrix
289
+ var freq_cis_imag: Matrix
290
+ var rms_att_weight: Matrix
291
+ var wq: Matrix3
292
+ var wk: Matrix3
293
+ var wv: Matrix3
294
+ var wo: Matrix3
295
+ var rms_ffn_weight: Matrix
296
+ var w1: Matrix3
297
+ var w3: Matrix3
298
+ var w2: Matrix3
299
+ var rms_final_weight: Matrix
300
+ var wcls: Matrix
301
+
302
+ fn __init__(inout self, config: Config, shared_weights: Int, inout buf: FileBuf):
303
+ self.token_embedding_table = Matrix(config.vocab_size, config.dim)
304
+ # set buf ptr to buf data from file
305
+ self.token_embedding_table.set_buf_ptr(
306
+ buf.bitcast_offset_float32(self.token_embedding_table.size())
307
+ )
308
+ self.rms_att_weight = Matrix(config.n_layers, config.dim)
309
+ self.rms_att_weight.set_buf_ptr(
310
+ buf.bitcast_offset_float32(self.rms_att_weight.size())
311
+ )
312
+ self.wq = Matrix3(config.n_layers, config.dim, config.dim)
313
+ self.wq.set_buf_ptr(buf.bitcast_offset_float32(self.wq.size()))
314
+ self.wk = Matrix3(config.n_layers, config.dim, config.dim)
315
+ self.wk.set_buf_ptr(buf.bitcast_offset_float32(self.wk.size()))
316
+ self.wv = Matrix3(config.n_layers, config.dim, config.dim)
317
+ self.wv.set_buf_ptr(buf.bitcast_offset_float32(self.wv.size()))
318
+ self.wo = Matrix3(config.n_layers, config.dim, config.dim)
319
+ self.wo.set_buf_ptr(buf.bitcast_offset_float32(self.wo.size()))
320
+ self.rms_ffn_weight = Matrix(config.n_layers, config.dim)
321
+ self.rms_ffn_weight.set_buf_ptr(
322
+ buf.bitcast_offset_float32(self.rms_ffn_weight.size())
323
+ )
324
+ self.w1 = Matrix3(config.n_layers, config.dim, config.hidden_dim)
325
+ self.w1.set_buf_ptr(buf.bitcast_offset_float32(self.w1.size()))
326
+ self.w2 = Matrix3(config.n_layers, config.dim, config.hidden_dim)
327
+ self.w2.set_buf_ptr(buf.bitcast_offset_float32(self.w2.size()))
328
+ self.w3 = Matrix3(config.n_layers, config.dim, config.hidden_dim)
329
+ self.w3.set_buf_ptr(buf.bitcast_offset_float32(self.w3.size()))
330
+ self.rms_final_weight = Matrix(config.dim)
331
+ self.rms_final_weight.set_buf_ptr(
332
+ buf.bitcast_offset_float32(self.rms_final_weight.size())
333
+ )
334
+ self.freq_cis_real = Matrix(config.seq_len, (config.dim // config.n_heads) // 2)
335
+ self.freq_cis_real.set_buf_ptr(
336
+ buf.bitcast_offset_float32(self.freq_cis_real.size())
337
+ )
338
+ self.freq_cis_imag = Matrix(config.seq_len, (config.dim // config.n_heads) // 2)
339
+ self.freq_cis_imag.set_buf_ptr(
340
+ buf.bitcast_offset_float32(self.freq_cis_imag.size())
341
+ )
342
+ self.wcls = Matrix(
343
+ config.vocab_size, config.dim
344
+ ) # if shared_weights else rest_floats
345
+ self.wcls.set_buf_ptr(self.token_embedding_table.data)
346
+
347
+
348
+ fn read_file(file_name: String, inout buf: FileBuf) raises:
349
+ let _os = Python.import_module("os")
350
+ let ff_size = _os.path.getsize(file_name)
351
+ let cp_size = string.atol(ff_size.to_string())
352
+ let cp_buf: BufferPtrType = BufferPtrType.alloc(cp_size)
353
+ # set window buffer to read binary data from file
354
+ let f = File(file_name)
355
+ var reader = BufReader[4096](f ^)
356
+ var bytes_read = 1
357
+ var offset = 0
358
+
359
+ while bytes_read > 0:
360
+ let buf = Buffer[4096, DType.uint8](cp_buf.offset(offset))
361
+ bytes_read = reader.read(buf)
362
+ offset += bytes_read
363
+ reader.do_nothing() # keeps lifetimes working
364
+ buf.data = cp_buf
365
+ buf.size = cp_size
366
+ buf.offset = 0
367
+ return None
368
+
369
+
370
+ fn config_init(inout config: Config, inout buf: FileBuf) raises:
371
+ config.dim = read_val_int(buf)
372
+ config.hidden_dim = read_val_int(buf)
373
+ config.n_layers = read_val_int(buf)
374
+ config.n_heads = read_val_int(buf)
375
+ config.n_kv_heads = read_val_int(buf)
376
+ config.vocab_size = read_val_int(buf)
377
+ config.seq_len = read_val_int(buf)
378
+ return None
379
+
380
+
381
+ fn tokenizer_init(inout tok: Tokenizer, inout buf: FileBuf) -> None:
382
+ tok.max_token_length = read_val_int(buf)
383
+ tok.vocab_scores = BufferPtrFloat32.alloc(tok.vocab_size)
384
+ tok.vocab = PointerStrings.alloc(tok.vocab_size)
385
+
386
+ # read vocab_scores & vocab values (tokens)
387
+ for i in range(0, tok.vocab_size):
388
+ tok.vocab_scores.simd_store[1](i, read_val_float32(buf))
389
+ let slen = read_val_int(buf)
390
+ tok.vocab.store(i, read_val_str(buf, slen))
391
+
392
+ tok.vocab_scores = buf.data.offset(buf.offset).bitcast[DType.float32]()
393
+ buf.offset += tok.vocab_size * 4
394
+ return None
395
+
396
+
397
+ fn accum(inout a: BufferPtrFloat32, b: BufferPtrFloat32, size: Int) -> None:
398
+ for i in range(size):
399
+ let val = a.offset(i).simd_load[1](0) + b.offset(i).simd_load[1](0)
400
+ a.offset(i).simd_store[1](0, val)
401
+
402
+
403
+ fn rmsnorm(
404
+ inout o: BufferPtrFloat32, x: BufferPtrFloat32, weight: BufferPtrFloat32, size: Int
405
+ ) -> None:
406
+ # Calculate sum of squares
407
+ var ss: Float32 = 0.0
408
+ for i in range(size):
409
+ let xx = x.offset(i).simd_load[1](0) ** 2
410
+ ss += xx
411
+ ss = ss / size + 1e-5
412
+ ss = 1.0 / math.sqrt(ss)
413
+ # Normalize and scale
414
+ for j in range(size):
415
+ let val = weight.offset(j).simd_load[1](0) * (ss * x.offset(j).simd_load[1](0))
416
+ o.offset(j).simd_store[1](0, val)
417
+
418
+
419
+ fn softmax(inout x: BufferPtrFloat32, size: Int) -> None:
420
+ # Find max value (for numerical stability)
421
+ var max_val: Float32 = x.offset(0).simd_load[1](0)
422
+ for i in range(size):
423
+ let xi = x.offset(i).simd_load[1](0)
424
+ if xi > max_val:
425
+ max_val = xi
426
+ # Exp and sum
427
+ var ssum: Float32 = 0.0
428
+ for i in range(size):
429
+ let xi = x.offset(i).simd_load[1](0)
430
+ x.offset(i).simd_store[1](0, math.exp(xi - max_val))
431
+ ssum += x.offset(i).simd_load[1](0)
432
+ # Normalize
433
+ for i in range(size):
434
+ let xi = x.offset(i).simd_load[1](0)
435
+ x.offset(i).simd_store[1](0, xi / ssum)
436
+
437
+
438
+ fn matmul_naive(C: Matrix, x: Matrix, w: Matrix) -> None:
439
+ # W(d,n) @ X(n,) -> C (d,)
440
+ # By far the most amount of time is spent inside this little function
441
+ for i in range(w.rows):
442
+ C[i] = 0.0
443
+ for j in range(w.cols):
444
+ C[i] += x[j] * w[i, j]
445
+
446
+
447
+ fn matmul_vectorized(C: Matrix, A: Matrix, B: Matrix):
448
+ for i in range(0, B.rows):
449
+ var tmp = SIMD[DType.float32, nelts](0)
450
+
451
+ @parameter
452
+ fn dot[_nelts: Int](j: Int):
453
+ if _nelts < nelts: # take care of tail array elements with length < nelts
454
+ tmp[0] += (A.load[_nelts](j) * B.load[_nelts](i, j)).reduce_add()
455
+ else:
456
+ tmp += A.load[nelts](j) * B.load[nelts](i, j)
457
+
458
+ vectorize[nelts, dot](B.cols)
459
+ C[i] = tmp.reduce_add()
460
+
461
+ fn matmul_parallelized(C: Matrix, A: Matrix, B: Matrix):
462
+ @parameter
463
+ fn calc_row(i: Int):
464
+ var T = BufferPtrFloat32.alloc(nelts)
465
+ var Tbuf = Buffer[nelts, DType.float32](T)
466
+ memset_zero(T, nelts)
467
+ @parameter
468
+ fn dot[nelts: Int](j: Int):
469
+ T.simd_store[nelts](
470
+ 0, T.simd_load[nelts](0) + A.load[nelts](j) * B.load[nelts](i, j)
471
+ )
472
+
473
+ vectorize[nelts, dot](B.cols)
474
+ C[i] = sum[nelts, DType.float32](Tbuf)
475
+
476
+ parallelize[calc_row](B.rows)
477
+
478
+
479
+ fn matmul(inout C: Matrix, A: Matrix, B: Matrix) -> None:
480
+ # B (d,n) @ A (n,) -> C (d,)
481
+ matmul_vectorized(C, A, B)
482
+ # matmul_parallelized(C, A, B)
483
+
484
+
485
+ fn transformer(
486
+ token: Int,
487
+ pos: Int,
488
+ config: Config,
489
+ inout state: RunState,
490
+ weights: TransformerWeights,
491
+ ) -> None:
492
+ # A few convenience variables
493
+ var x = state.x.data
494
+ let dim = config.dim
495
+ let hidden_dim = config.hidden_dim
496
+ let head_size = dim // config.n_heads
497
+
498
+ # tmp matrix for matmul operations
499
+ var tmpw = Matrix(0, 0)
500
+
501
+ # Copy the token embedding into x
502
+ let content_row = weights.token_embedding_table.data.offset(token * dim)
503
+ memcpy[DType.float32](x, content_row, config.dim)
504
+
505
+ # Pluck out the "pos" row of freq_cis_real and freq_cis_imag
506
+ let freq_cis_real_row = weights.freq_cis_real.data.offset(pos * head_size // 2)
507
+ let freq_cis_imag_row = weights.freq_cis_imag.data.offset(pos * head_size // 2)
508
+
509
+ # Forward all the layers
510
+ for l in range(config.n_layers):
511
+ # Attention rmsnorm
512
+ rmsnorm(state.xb.data, x, weights.rms_att_weight.data.offset(l * dim), dim)
513
+
514
+ # QKV matmuls for this position
515
+ tmpw.set_buf_ptr(weights.wq.data.offset(l * dim * dim), dim, dim)
516
+ matmul(state.q, state.xb, tmpw)
517
+
518
+ tmpw.set_buf_ptr(weights.wk.data.offset(l * dim * dim), dim, dim)
519
+ matmul(state.k, state.xb, tmpw)
520
+
521
+ tmpw.set_buf_ptr(weights.wv.data.offset(l * dim * dim), dim, dim)
522
+ matmul(state.v, state.xb, tmpw)
523
+
524
+ # Apply RoPE rotation to the q and k vectors for each head
525
+ for h in range(config.n_heads):
526
+ # Get the q and k vectors for this head
527
+ let q = state.q.data.offset(h * head_size)
528
+ let k = state.k.data.offset(h * head_size)
529
+
530
+ # Rotate q and k by the freq_cis_real and freq_cis_imag
531
+ for i in range(0, head_size, 2):
532
+ let q0 = q.offset(i).simd_load[1](0)
533
+ let q1 = q.offset(i + 1).simd_load[1](0)
534
+ let k0 = k.offset(i).simd_load[1](0)
535
+ let k1 = k.offset(i + 1).simd_load[1](0)
536
+ let fcr = freq_cis_real_row.offset(i // 2).simd_load[1](0)
537
+ let fci = freq_cis_imag_row.offset(i // 2).simd_load[1](0)
538
+ q.offset(i).simd_store[1](0, q0 * fcr - q1 * fci)
539
+ q.offset(i + 1).simd_store[1](0, q0 * fci + q1 * fcr)
540
+ k.offset(i).simd_store[1](0, k0 * fcr - k1 * fci)
541
+ k.offset(i + 1).simd_store[1](0, k0 * fci + k1 * fcr)
542
+
543
+ # Save key,value at this time step (pos) to our kv cache
544
+ let loff = l * config.seq_len * dim # kv cache layer offset for convenience
545
+ let key_cache_row = state.key_cache.data.offset(loff + pos * dim)
546
+ let value_cache_row = state.value_cache.data.offset(loff + pos * dim)
547
+ memcpy[DType.float32](key_cache_row, state.k.data, config.dim)
548
+ memcpy[DType.float32](value_cache_row, state.v.data, config.dim)
549
+
550
+ # Multihead attention. Iterate over all heads
551
+ for h in range(config.n_heads):
552
+ # Get the query vector for this head
553
+ let q = state.q.data.offset(h * head_size)
554
+
555
+ # Attention scores for this head
556
+ var att = state.att.data.offset(h * config.seq_len)
557
+
558
+ # Iterate over all timesteps, including the current one
559
+ for t in range(pos + 1):
560
+ # Get the key vector for this head and at this timestep
561
+ let k = state.key_cache.data.offset(loff + t * dim + h * head_size)
562
+ # Calculate the attention score as the dot product of q and k
563
+ var score: Float32 = 0.0
564
+ for i in range(head_size):
565
+ score += q.offset(i).simd_load[1](0) * k.offset(i).simd_load[1](0)
566
+ score /= math.sqrt[DType.float32, 1](head_size)
567
+
568
+ # Save the score to the attention buffer
569
+ att.offset(t).simd_store[1](0, score)
570
+
571
+ # Softmax the scores to get attention weights, from 0..pos inclusively
572
+ softmax(att, pos + 1)
573
+
574
+ # Weighted sum of the values, store back into xb
575
+ let xb = state.xb.data.offset(h * head_size)
576
+ memset_zero(xb, head_size)
577
+ for t in range(pos + 1):
578
+ # Get the value vector for this head and at this timestep
579
+ let v = state.value_cache.data.offset(loff + t * dim + h * head_size)
580
+ # Get the attention weight for this timestep
581
+ let a = att.offset(t).simd_load[1](0)
582
+ # Accumulate the weighted value into xb
583
+ for i in range(head_size):
584
+ let xbi = xb.offset(i).simd_load[1](0) + a * v.offset(i).simd_load[
585
+ 1
586
+ ](0)
587
+ xb.offset(i).simd_store[1](0, xbi)
588
+ # Final matrix multiplication to get the output of the attention
589
+ tmpw.set_buf_ptr(weights.wo.data.offset(l * dim * dim), dim, dim)
590
+ matmul(state.xb2, state.xb, tmpw)
591
+
592
+ # Residual connection back into x
593
+ accum(x, state.xb2.data, dim)
594
+
595
+ # FFN rmsnorm
596
+ rmsnorm(state.xb.data, x, weights.rms_ffn_weight.data.offset(l * dim), dim)
597
+
598
+ # Calculate self.w1(x) and self.w3(x) for FFN
599
+ tmpw.set_buf_ptr(weights.w1.data.offset(l * dim * hidden_dim), hidden_dim, dim)
600
+ matmul(state.hb, state.xb, tmpw)
601
+
602
+ tmpw.set_buf_ptr(weights.w3.data.offset(l * dim * hidden_dim), hidden_dim, dim)
603
+ matmul(state.hb2, state.xb, tmpw)
604
+
605
+ # Apply SiLU activation function (silu(x) = x * sigmoid(x))
606
+ for i in range(hidden_dim):
607
+ let hbi = state.hb[i]
608
+ state.hb[i] = hbi * (1.0 / (1.0 + math.exp(-hbi)))
609
+
610
+ # Elementwise multiply with w3(x)
611
+ for i in range(hidden_dim):
612
+ state.hb[i] = state.hb[i] * state.hb2[i]
613
+
614
+ # Final matrix multiplication to get the output of the FFN
615
+ tmpw.set_buf_ptr(weights.w2.data.offset(l * dim * hidden_dim), dim, hidden_dim)
616
+ matmul(state.xb, state.hb, tmpw)
617
+
618
+ # Residual connection
619
+ accum(x, state.xb.data, dim)
620
+
621
+ # Final rmsnorm
622
+ rmsnorm(x, x, weights.rms_final_weight.data, dim)
623
+
624
+ # Classifier into logits
625
+ tmpw.set_buf_ptr(weights.wcls.data, config.vocab_size, dim)
626
+ matmul(state.logits, state.x, tmpw)
627
+
628
+
629
+ fn argmax(v: Matrix) -> Int:
630
+ # return argmax of v
631
+ var max_i: Int = 0
632
+ var max_p: Float32 = v[0]
633
+ for i in range(v.cols):
634
+ if v[i] > max_p:
635
+ max_i = i
636
+ max_p = v[i]
637
+ return max_i
638
+
639
+
640
+ fn sample(probabilities: Matrix) -> Int:
641
+ let n = probabilities.cols
642
+ # Sample index from probabilities, they must sum to 1
643
+ # get random value within (min, max) float32 range
644
+ let r = DTypePointer[DType.float32].alloc(1)
645
+ rand[DType.float32](r, 1)
646
+ var cdf: Float32 = 0.0
647
+ for i in range(n):
648
+ cdf += probabilities[i]
649
+ if r.simd_load[1](0) < cdf:
650
+ return i
651
+ return n - 1 # In case of rounding errors
652
+
653
+
654
+ fn print_str(s: PointerString):
655
+ # print all chars till null character
656
+ var p: Int = 0
657
+ while s[p].to_int() != 0:
658
+ print_no_newline(chr(s[p].to_int()))
659
+ p += 1
660
+
661
+
662
+ fn time_in_ms() -> Int:
663
+ # Returns time in milliseconds for benchmarking the model speed
664
+ return time.now() // 1_000_000
665
+
666
+
667
+ fn main() raises:
668
+ print("num hardware threads: ", num_cores(), " SIMD vector width: ", nelts)
669
+ let checkpoint = "stories15M.bin"
670
+ # let checkpoint = "stories110M.bin"
671
+ let tokenizer = "tokenizer.bin"
672
+ let temperature = 0.0
673
+ var steps = 256
674
+ let prompt = ""
675
+ let rng_seed: Int = time.now()
676
+ random.seed(rng_seed)
677
+ var fbuf: FileBuf = FileBuf()
678
+ var tbuf: FileBuf = FileBuf()
679
+ var config: Config = Config()
680
+
681
+ read_file(checkpoint, fbuf)
682
+ print("checkpoint size: ", fbuf.size)
683
+ config_init(config, fbuf)
684
+
685
+ # negative vocab size is hacky way of signaling unshared weights. bit yikes.
686
+ let shared_weights = 1 if config.vocab_size > 0 else 0
687
+ config.vocab_size = (
688
+ -config.vocab_size if config.vocab_size < 0 else config.vocab_size
689
+ )
690
+
691
+ let weights: TransformerWeights = TransformerWeights(config, shared_weights, fbuf)
692
+
693
+ var tok: Tokenizer = Tokenizer(config.vocab_size)
694
+
695
+ if steps <= 0 or steps > config.seq_len:
696
+ steps = config.seq_len
697
+
698
+ # Read in the tokenizer.bin file
699
+ read_file(tokenizer, tbuf)
700
+ tokenizer_init(tok, tbuf)
701
+
702
+ # Create and initialize the application RunState
703
+ var state = RunState(config)
704
+
705
+ # Start the main loop
706
+ var start = 0 # Used to time our code, only initialized after the first iteration
707
+ var next_token = 0 # Will store the next token in the sequence
708
+ # Initialize with token 1 (=BOS), as done in Llama-2 sentencepiece tokenizer
709
+ var token = 1
710
+ var pos = 0 # Position in the sequence
711
+ # Explicitly print the initial BOS token for stylistic symmetry reasons
712
+
713
+ print("<s>")
714
+
715
+ while pos < steps:
716
+ # Forward the transformer to get logits for the next token
717
+ transformer(token, pos, config, state, weights)
718
+
719
+ # Sample the next token
720
+ if temperature == 0.0:
721
+ # Greedy argmax sampling: take the token with the highest probability
722
+ next_token = argmax(state.logits)
723
+ else:
724
+ # Apply the temperature to the logits
725
+ for q in range(config.vocab_size):
726
+ state.logits[q] = state.logits[q] / temperature
727
+ # Apply softmax to the logits to get the probabilities for the next token
728
+ softmax(state.logits.data, config.vocab_size)
729
+ # Sample from this distribution to get the next token
730
+ next_token = sample(state.logits)
731
+
732
+ var token_str: PointerString = tok.vocab[next_token]
733
+ if token == 1 and token_str[0] == ord(" "):
734
+ token_str = token_str.offset(1)
735
+
736
+ print_str(token_str)
737
+ # flush?
738
+
739
+ # Advance forward
740
+ token = next_token
741
+ pos += 1
742
+
743
+ if start == 0:
744
+ start = time_in_ms()
745
+
746
+ let end = time_in_ms()
747
+ print("\nachieved tok/s: ", (steps - 1) / (end - start) * 1000)
read/__init__.mojo ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This is a port of Zig's buffered reader.
3
+ See: https://github.com/ziglang/zig/blob/master/lib/std/io/buffered_reader.zig
4
+ The MIT License (Expat)
5
+
6
+ Copyright (c) Lukas Hermann
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ of this software and associated documentation files (the "Software"), to deal
10
+ in the Software without restriction, including without limitation the rights
11
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ copies of the Software, and to permit persons to whom the Software is
13
+ furnished to do so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in
16
+ all copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
+ THE SOFTWARE.
25
+ """
26
+
27
+ from utils.list import Dim
28
+ from math import min
29
+ from math.limit import max_finite
30
+ from memory import memcpy
31
+ from memory.buffer import Buffer
32
+ from memory.unsafe import Pointer, DTypePointer
33
+ from sys.info import sizeof
34
+ from utils.index import Index
35
+ from utils.vector import DynamicVector
36
+ import testing
37
+
38
+
39
+ from .libc.stdio import fopen, fread, fclose, FILE
40
+ from .libc.dirent import readdir, opendir, closedir, DIR, dirent
41
+ from .libc.string import strnlen
42
+
43
+ alias BUF_SIZE = 4096
44
+
45
+ # Types aliases
46
+ alias c_char = UInt8
47
+
48
+
49
+ fn to_char_ptr(s: String) -> Pointer[c_char]:
50
+ """Only ASCII-based strings."""
51
+ let ptr = Pointer[c_char]().alloc(len(s) + 1)
52
+ for i in range(len(s)):
53
+ ptr.store(i, ord(s[i]))
54
+ ptr.store(len(s), ord("\0"))
55
+ return ptr
56
+
57
+
58
+ struct File:
59
+ var handle: Pointer[FILE]
60
+ var fname: Pointer[c_char]
61
+ var mode: Pointer[c_char]
62
+
63
+ fn __init__(inout self, filename: String):
64
+ let fname = to_char_ptr(filename)
65
+ let mode = to_char_ptr("r")
66
+ let handle = fopen(fname, mode)
67
+
68
+ self.fname = fname
69
+ self.mode = mode
70
+ self.handle = handle
71
+
72
+ fn __bool__(self) -> Bool:
73
+ return self.handle.__bool__()
74
+
75
+ fn __del__(owned self) raises:
76
+ if self.handle:
77
+ pass
78
+ # let c = fclose(self.handle)
79
+ # if c != 0:
80
+ # raise Error("Failed to close file")
81
+ if self.fname:
82
+ self.fname.free()
83
+ if self.mode:
84
+ self.mode.free()
85
+
86
+ fn __moveinit__(inout self, owned other: Self):
87
+ self.fname = other.fname
88
+ self.mode = other.mode
89
+ self.handle = other.handle
90
+ other.handle = Pointer[FILE]()
91
+ other.fname = Pointer[c_char]()
92
+ other.mode = Pointer[c_char]()
93
+
94
+ fn do_nothing(self):
95
+ pass
96
+
97
+ fn read[D: Dim](self, buffer: Buffer[D, DType.uint8]) raises -> Int:
98
+ return fread(
99
+ buffer.data.as_scalar_pointer(), sizeof[UInt8](), BUF_SIZE, self.handle
100
+ ).to_int()
101
+
102
+
103
+ struct DirEntry:
104
+ var _pointer: Pointer[dirent]
105
+ var name: String
106
+
107
+ fn __init__(inout self, pointer: Pointer[dirent]):
108
+ self.name = String()
109
+ if pointer:
110
+ print("hit")
111
+ let name_ptr = pointer.bitcast[UInt8]().offset(
112
+ sizeof[UInt64]() * 2 + sizeof[UInt16]() + sizeof[UInt8]()
113
+ )
114
+ let name_len = strnlen(name_ptr)
115
+ for i in range(name_len):
116
+ self.name += chr(name_ptr.load(i).to_int())
117
+ self._pointer = pointer
118
+
119
+
120
+ @value
121
+ @register_passable("trivial")
122
+ struct DirIter:
123
+ var handle: Pointer[DIR]
124
+ var data: Pointer[dirent]
125
+
126
+ fn __iter__(inout self) -> Self:
127
+ self.data = readdir(self.handle)
128
+
129
+ fn __next__(self) raises -> Pointer[dirent]:
130
+ return self.data
131
+
132
+ fn __len__(self) -> Int:
133
+ if self.handle and self.data:
134
+ return 1
135
+ return 0
136
+
137
+
138
+ struct Dir:
139
+ var handle: Pointer[DIR]
140
+ var path: Pointer[c_char]
141
+
142
+ fn __init__(inout self, path: String):
143
+ self.path = to_char_ptr(path)
144
+ self.handle = opendir(self.path)
145
+
146
+ fn __bool__(self) -> Bool:
147
+ return self.handle.__bool__()
148
+
149
+ fn __iter__(self) -> DirIter:
150
+ return DirIter(self.handle, Pointer[dirent]())
151
+
152
+ fn __del__(owned self) raises:
153
+ let c = closedir(self.handle)
154
+ if c != 0:
155
+ raise Error("failed to close dir")
156
+ self.path.free()
157
+
158
+ fn do_nothing(self):
159
+ pass
160
+
161
+
162
+ struct BufReader[BUF_SIZE: Int]:
163
+ var unbuffered_reader: File
164
+ var data: DTypePointer[DType.uint8]
165
+ var end: Int
166
+ var start: Int
167
+
168
+ fn __init__(inout self, owned reader: File):
169
+ self.unbuffered_reader = reader ^
170
+ self.data = DTypePointer[DType.uint8]().alloc(BUF_SIZE)
171
+ self.end = 0
172
+ self.start = 0
173
+
174
+ fn read[D: Dim](inout self, dest: Buffer[D, DType.uint8]) raises -> Int:
175
+ var dest_index = 0
176
+ let buf = Buffer[BUF_SIZE, DType.uint8](self.data)
177
+
178
+ while dest_index < len(dest):
179
+ let written = min(len(dest) - dest_index, self.end - self.start)
180
+ memcpy(dest.data.offset(dest_index), self.data.offset(self.start), written)
181
+ if written == 0:
182
+ # buf empty, fill it
183
+ let n = self.unbuffered_reader.read(buf)
184
+ if n == 0:
185
+ # reading from the unbuffered stream returned nothing
186
+ # so we have nothing left to read.
187
+ return dest_index
188
+ self.start = 0
189
+ self.end = n
190
+ self.start += written
191
+ dest_index += written
192
+ return len(dest)
193
+
194
+ fn do_nothing(self):
195
+ pass
read/libc/__init__.mojo ADDED
File without changes
read/libc/dirent/__init__.mojo ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from memory.unsafe import Pointer
2
+
3
+
4
+ @value
5
+ @register_passable("trivial")
6
+ struct DIR:
7
+ pass
8
+
9
+
10
+ @value
11
+ @register_passable("trivial")
12
+ struct dirent:
13
+ var d_ino: UInt64
14
+ var d_off: UInt64
15
+ var d_reclen: UInt16
16
+ var d_type: UInt8
17
+ var d_name: Pointer[UInt8]
18
+
19
+
20
+ # ===--------------------------------------------------------------------===#
21
+ # closedir
22
+ # ===-----------------------------------------------------------------------===#
23
+
24
+
25
+ fn closedir(arg: Pointer[DIR]) -> Int32:
26
+ return external_call["closedir", Int32, Pointer[DIR]](arg)
27
+
28
+
29
+ # ===--------------------------------------------------------------------===#
30
+ # opendir
31
+ # ===-----------------------------------------------------------------------===#
32
+
33
+
34
+ fn opendir(arg: Pointer[UInt8]) -> Pointer[DIR]:
35
+ return external_call["opendir", Pointer[DIR], Pointer[UInt8]](arg)
36
+
37
+
38
+ # ===--------------------------------------------------------------------===#
39
+ # readdir
40
+ # ===-----------------------------------------------------------------------===#
41
+
42
+
43
+ fn readdir(arg: Pointer[DIR]) -> Pointer[dirent]:
44
+ return external_call["readdir", Pointer[dirent], Pointer[DIR]](arg)
45
+
46
+
47
+ # ===--------------------------------------------------------------------===#
48
+ # fdopendir
49
+ # ===-----------------------------------------------------------------------===#
50
+
51
+
52
+ fn fdopendir(arg: Int32) -> DIR:
53
+ return external_call["fdopendir", DIR](arg)
54
+
55
+
56
+ #
read/libc/stdio/__init__.mojo ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from memory.unsafe import Pointer
2
+
3
+
4
+ @value
5
+ @register_passable("trivial")
6
+ struct FILE:
7
+ pass
8
+
9
+
10
+ # ===--------------------------------------------------------------------===#
11
+ # clearerr
12
+ # ===-----------------------------------------------------------------------===#
13
+
14
+
15
+ fn clearerr(arg: Pointer[FILE]) -> UInt8:
16
+ return external_call["clearerr", UInt8, Pointer[FILE]](arg)
17
+
18
+
19
+ # ===--------------------------------------------------------------------===#
20
+ # fclose
21
+ # ===-----------------------------------------------------------------------===#
22
+
23
+
24
+ fn fclose(arg: Pointer[FILE]) -> Int32:
25
+ return external_call["fclose", Int32](arg)
26
+
27
+
28
+ # ===--------------------------------------------------------------------===#
29
+ # feof
30
+ # ===-----------------------------------------------------------------------===#
31
+
32
+
33
+ fn feof(arg: Pointer[FILE]) -> Int32:
34
+ return external_call["feof", Int32, Pointer[FILE]](arg)
35
+
36
+
37
+ # ===--------------------------------------------------------------------===#
38
+ # ferror
39
+ # ===-----------------------------------------------------------------------===#
40
+
41
+
42
+ fn ferror(arg: Pointer[FILE]) -> Int32:
43
+ return external_call["ferror", Int32, Pointer[FILE]](arg)
44
+
45
+
46
+ # ===--------------------------------------------------------------------===#
47
+ # fflush
48
+ # ===-----------------------------------------------------------------------===#
49
+
50
+
51
+ fn fflush(arg: Pointer[FILE]) -> Int32:
52
+ return external_call["fflush", Int32, Pointer[FILE]](arg)
53
+
54
+
55
+ # ===--------------------------------------------------------------------===#
56
+ # fgetc
57
+ # ===-----------------------------------------------------------------------===#
58
+
59
+
60
+ fn fgetc(arg: Pointer[FILE]) -> Int32:
61
+ return external_call["fgetc", Int32, Pointer[FILE]](arg)
62
+
63
+
64
+ # ===--------------------------------------------------------------------===#
65
+ # fopen
66
+ # ===-----------------------------------------------------------------------===#
67
+
68
+
69
+ fn fopen(__filename: Pointer[UInt8], __mode: Pointer[UInt8]) -> Pointer[FILE]:
70
+ return external_call["fopen", Pointer[FILE], Pointer[UInt8], Pointer[UInt8]](
71
+ __filename, __mode
72
+ )
73
+
74
+
75
+ # ===--------------------------------------------------------------------===#
76
+ # fread
77
+ # ===-----------------------------------------------------------------------===#
78
+
79
+
80
+ fn fread(
81
+ __ptr: Pointer[UInt8], __size: UInt64, __nitems: UInt64, __stream: Pointer[FILE]
82
+ ) -> UInt64:
83
+ return external_call[
84
+ "fread", UInt64, Pointer[UInt8], UInt64, UInt64, Pointer[FILE]
85
+ ](__ptr, __size, __nitems, __stream)
read/libc/string/__init__.mojo ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from memory.unsafe import Pointer
2
+
3
+
4
+ fn strnlen(pointer: Pointer[UInt8]) -> Int:
5
+ return external_call["strnlen", Int, Pointer[UInt8]](pointer)
tokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74f899535f8f99cdeac697474b697f3fa40467b8376dab7b6c4d8c753f28ae9f
3
+ size 432717