--- license: apache-2.0 datasets: - mcysqrd/mojo_code --- FIM training over deepseek-coder-1.3B using a mojo-code dataset. This is an alpha version. It is trained only for FIM co-pilot style usage. later versions should have Q&A added as well as better performance. please leave your comments to help improve it. the recipe for this was based on this template from https://huggingface.co/blog/personal-copilot ``` tokenizer = AutoTokenizer.from_pretrained(merged_model_path,trust_remote_code=True,use_fast=True) model = AutoModelForCausalLM.from_pretrained( merged_model_path, device_map={"": 0}, use_cache=True, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16 ) input_text = """<|fim▁begin|> from algorithm import parallelize, vectorize from benchmark import Benchmark from complex import ComplexSIMD, ComplexFloat64 from math import iota from os import env from python import Python from python.object import PythonObject from runtime.llcl import num_cores, Runtime from tensor import Tensor from utils.index import Index alias float_type = DType.float64 alias simd_width = simdwidthof[float_type]() alias width = 960 alias height = 960 alias MAX_ITERS = 200 alias min_x = -2.0 alias max_x = 0.6 alias min_y = -1.5 alias max_y = 1.5 fn mandelbrot_kernel_SIMD[ simd_width: Int ](c: ComplexSIMD[float_type, simd_width]) -> SIMD[float_type, simd_width]: let cx = c.re let cy = c.im var x = SIMD[float_type, simd_width](0) var y = SIMD[float_type, simd_width](0) var y2 = SIMD[float_type, simd_width](0) var iters = SIMD[float_type, simd_width](0) var t: SIMD[DType.bool, simd_width] = True for i in range(MAX_ITERS): if not t.reduce_or(): break y2 = y*y y = x.fma(y + y, cy) t = x.fma(x, y2) <= 4 x = x.fma(x, cx - y2) iters = t.select(iters + 1, iters) return iters fn compare(): let t = Tensor[float_type](height, width) @parameter fn worker(row: Int): let scale_x = (max_x - min_x) / width let scale_y = (max_y - min_y) / height <|fim▁hole|> fn main(): compare() <|fim▁end|>""" inputs = tokenizer(input_text, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_length=547+200) print(tokenizer.decode(outputs[0], skip_special_tokens=True)[len(input_text):]) def stream(user_prompt): runtimeFlag = "cuda:0" inputs = tokenizer([user_prompt], return_tensors="pt").to(runtimeFlag) streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) _ = model.generate(**inputs, streamer=streamer, max_new_tokens=200) stream(input_text) ``` also try to use an inference endpoint and use a VS-Code extension