mcysqrd
/

mojo-coder-1B

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

mojo-coder-1B / README.md

mcysqrd's picture

Update README.md

f6361c3 7 months ago

|

history blame contribute delete

No virus

2.82 kB

	---
	license: apache-2.0
	datasets:
	- mcysqrd/mojo_code
	---

	FIM training over deepseek-coder-1.3B using a mojo-code dataset.
	This is an alpha version. It is trained only for FIM co-pilot style usage.
	later versions should have Q&A added as well as better performance. please leave your comments to help improve it.
	the recipe for this was based on this template from https://huggingface.co/blog/personal-copilot

	```
	tokenizer = AutoTokenizer.from_pretrained(merged_model_path,trust_remote_code=True,use_fast=True)
	model = AutoModelForCausalLM.from_pretrained(
	merged_model_path,
	device_map={"": 0},
	use_cache=True,
	trust_remote_code=True,
	attn_implementation="flash_attention_2",
	torch_dtype=torch.bfloat16
	)

	input_text = """<｜fim▁begin｜>
	from algorithm import parallelize, vectorize
	from benchmark import Benchmark
	from complex import ComplexSIMD, ComplexFloat64
	from math import iota
	from os import env
	from python import Python
	from python.object import PythonObject
	from runtime.llcl import num_cores, Runtime
	from tensor import Tensor
	from utils.index import Index


	alias float_type = DType.float64
	alias simd_width = simdwidthof[float_type]()

	alias width = 960
	alias height = 960
	alias MAX_ITERS = 200

	alias min_x = -2.0
	alias max_x = 0.6
	alias min_y = -1.5
	alias max_y = 1.5

	fn mandelbrot_kernel_SIMD[
	simd_width: Int
	](c: ComplexSIMD[float_type, simd_width]) -> SIMD[float_type, simd_width]:
	let cx = c.re
	let cy = c.im
	var x = SIMD[float_type, simd_width](0)
	var y = SIMD[float_type, simd_width](0)
	var y2 = SIMD[float_type, simd_width](0)
	var iters = SIMD[float_type, simd_width](0)

	var t: SIMD[DType.bool, simd_width] = True
	for i in range(MAX_ITERS):
	if not t.reduce_or():
	break
	y2 = y*y
	y = x.fma(y + y, cy)
	t = x.fma(x, y2) <= 4
	x = x.fma(x, cx - y2)
	iters = t.select(iters + 1, iters)
	return iters

	fn compare():
	let t = Tensor[float_type](height, width)

	@parameter
	fn worker(row: Int):
	let scale_x = (max_x - min_x) / width
	let scale_y = (max_y - min_y) / height
	<｜fim▁hole｜>
	fn main():
	compare()
	<｜fim▁end｜>"""

	inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
	outputs = model.generate(**inputs, max_length=547+200)
	print(tokenizer.decode(outputs[0], skip_special_tokens=True)[len(input_text):])

	def stream(user_prompt):
	runtimeFlag = "cuda:0"
	inputs = tokenizer([user_prompt], return_tensors="pt").to(runtimeFlag)
	streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
	_ = model.generate(**inputs, streamer=streamer, max_new_tokens=200)

	stream(input_text)
	```

	also try to use an inference endpoint and use a VS-Code extension