SlyEcho
/

open_llama_3b_ggml

Model card Files Files and versions Community

open_llama_3b_ggml / convert.py.diff

SlyEcho's picture

Update to 1000T token final version

a8f706c verified about 1 year ago

1.44 kB

	--- a/convert.py 2023-05-30 20:48:07.687486627 +0300
	+++ b/convert.py 2023-05-30 20:47:55.854142065 +0300
	@@ -143,12 +143,22 @@
	def guessed(model: 'LazyModel', file_type: GGMLFileType) -> 'Params':
	n_vocab, n_embd = model["tok_embeddings.weight"].shape

	+ n_mult=256
	+ n_head=n_embd // 128
	+ n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
	+
	+ # TODO: hack for open_llama_3b
	+ if n_embd == 3200:
	+ n_mult = 216
	+ n_head = 32
	+ n_layer = 26
	+
	return Params(
	n_vocab=n_vocab,
	n_embd=n_embd,
	- n_mult=256,
	- n_head=n_embd // 128,
	- n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model),
	+ n_mult=n_mult,
	+ n_head=n_head,
	+ n_layer=n_layer,
	file_type=file_type,
	)

	@@ -597,7 +607,9 @@
	out["norm.weight"] = model["model.norm.weight"]
	out["output.weight"] = model["lm_head.weight"]

	- n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
	+ # TODO: hack for open_llama_3b
	+ n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
	+ n_head = 32 if n_embd == 3200 else n_embd // 128
	for i in itertools.count():
	if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
	break