reeducator commited on
Commit
af985e0
1 Parent(s): d5208ec

Add model files and README

Browse files
README.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - Squish42/bluemoon-fandom-1-1-rp-cleaned
4
+ language:
5
+ - en
6
+ ---
7
+ ## General
8
+ Bluemoon roleplay finetune of LLaMA 33B (2 roleplayers only). This release also tests a longer 4k context token size achieved with AliBi.
9
+
10
+ ## Models
11
+ *GGML 4-bit for llama.cpp*<br/>
12
+
13
+ 1. ggml-bluemoonrp-30b-4k-epoch6-q5_0.bin
14
+
15
+ *GPTQ 4-bit CUDA:*<br/>
16
+
17
+ 1. bluemoonrp-30b-4k-epoch6-4bit-128g.safetensors
18
+
19
+ ## Remarks
20
+ This model has been trained using the following prompt (Vicuna 1.1 format):
21
+ ```
22
+ A transcript of a roleplay between two players, LEAD and ASSOCIATE. LEAD sets up a scenario and the characters, from which ASSOCIATE then assumes a character role and continues the story for that role in response to description given by LEAD. The story and characters are developed by exchange of detailed event descriptions and character dialogs, successively given by both LEAD and ASSOCIATE.
23
+ LEAD: [role1 message]
24
+ ASSOCIATE: [role2 message]</s>
25
+ ```
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 1,
6
+ "eos_token_id": 2,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 6656,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 17920,
11
+ "max_position_embeddings": 2048,
12
+ "max_seq_len": 4096,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 52,
15
+ "num_hidden_layers": 60,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-06,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "bfloat16",
20
+ "transformers_version": "4.28.0.dev0",
21
+ "use_cache": true,
22
+ "vocab_size": 32000
23
+ }
ggml-bluemoonrp-30b-4k-epoch6-q5_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b7ade7fb1abba1a478ba3fdc19526b79d87d11accf41b6d59caf966cd8f3718
3
+ size 22366783872
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "model_max_length": 4096,
22
+ "pad_token": null,
23
+ "padding_side": "right",
24
+ "sp_model_kwargs": {},
25
+ "special_tokens_map_file": "special_tokens_map.json",
26
+ "tokenizer_class": "LlamaTokenizer",
27
+ "unk_token": {
28
+ "__type": "AddedToken",
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
+ }