mcysqrd commited on
Commit
2cde956
1 Parent(s): 055efc6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +84 -0
README.md CHANGED
@@ -1,3 +1,87 @@
1
  ---
2
  license: apache-2.0
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ datasets:
4
+ - mcysqrd/mojo_code
5
  ---
6
+
7
+ ```
8
+ tokenizer = AutoTokenizer.from_pretrained(merged_model_path,trust_remote_code=True,use_fast=True)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ merged_model_path,
11
+ device_map={"": 0},
12
+ use_cache=True,
13
+ trust_remote_code=True,
14
+ attn_implementation="flash_attention_2",
15
+ torch_dtype=torch.bfloat16
16
+ )
17
+
18
+ input_text = """<|fim▁begin|>
19
+ from algorithm import parallelize, vectorize
20
+ from benchmark import Benchmark
21
+ from complex import ComplexSIMD, ComplexFloat64
22
+ from math import iota
23
+ from os import env
24
+ from python import Python
25
+ from python.object import PythonObject
26
+ from runtime.llcl import num_cores, Runtime
27
+ from tensor import Tensor
28
+ from utils.index import Index
29
+
30
+
31
+ alias float_type = DType.float64
32
+ alias simd_width = simdwidthof[float_type]()
33
+
34
+ alias width = 960
35
+ alias height = 960
36
+ alias MAX_ITERS = 200
37
+
38
+ alias min_x = -2.0
39
+ alias max_x = 0.6
40
+ alias min_y = -1.5
41
+ alias max_y = 1.5
42
+
43
+ fn mandelbrot_kernel_SIMD[
44
+ simd_width: Int
45
+ ](c: ComplexSIMD[float_type, simd_width]) -> SIMD[float_type, simd_width]:
46
+ let cx = c.re
47
+ let cy = c.im
48
+ var x = SIMD[float_type, simd_width](0)
49
+ var y = SIMD[float_type, simd_width](0)
50
+ var y2 = SIMD[float_type, simd_width](0)
51
+ var iters = SIMD[float_type, simd_width](0)
52
+
53
+ var t: SIMD[DType.bool, simd_width] = True
54
+ for i in range(MAX_ITERS):
55
+ if not t.reduce_or():
56
+ break
57
+ y2 = y*y
58
+ y = x.fma(y + y, cy)
59
+ t = x.fma(x, y2) <= 4
60
+ x = x.fma(x, cx - y2)
61
+ iters = t.select(iters + 1, iters)
62
+ return iters
63
+
64
+ fn compare():
65
+ let t = Tensor[float_type](height, width)
66
+
67
+ @parameter
68
+ fn worker(row: Int):
69
+ let scale_x = (max_x - min_x) / width
70
+ let scale_y = (max_y - min_y) / height
71
+ <|fim▁hole|>
72
+ fn main():
73
+ compare()
74
+ <|fim▁end|>"""
75
+
76
+ inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
77
+ outputs = model.generate(**inputs, max_length=547+200)
78
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True)[len(input_text):])
79
+
80
+ def stream(user_prompt):
81
+ runtimeFlag = "cuda:0"
82
+ inputs = tokenizer([user_prompt], return_tensors="pt").to(runtimeFlag)
83
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
84
+ _ = model.generate(**inputs, streamer=streamer, max_new_tokens=200)
85
+
86
+ stream(input_text)
87
+ ```