first
Browse files- __pycache__/partitions.cpython-38.pyc +0 -0
- added_tokens.json +1 -0
- gpt-neo-1.3B/config.json → config.json +0 -0
- gpt-neo-1.3B/flax_model.msgpack → flax_model.msgpack +0 -0
- run.sh +19 -0
- setup_devices.py +0 -18
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- vocab.json +0 -0
__pycache__/partitions.cpython-38.pyc
CHANGED
Binary files a/__pycache__/partitions.cpython-38.pyc and b/__pycache__/partitions.cpython-38.pyc differ
|
|
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<|endoftext|>": 50265}
|
gpt-neo-1.3B/config.json → config.json
RENAMED
File without changes
|
gpt-neo-1.3B/flax_model.msgpack → flax_model.msgpack
RENAMED
File without changes
|
run.sh
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python run_clm_mp.py \
|
2 |
+
--model_name_or_path /mnt/disks/flaxdisk/norwegian-gptneo-red/ \
|
3 |
+
--tokenizer_name /mnt/disks/flaxdisk/norwegian-gptneo-red/ \
|
4 |
+
--train_file /mnt/disks/flaxdisk/corpus/social_train.json \
|
5 |
+
--validation_file /mnt/disks/flaxdisk/corpus/social_validation.json \
|
6 |
+
--do_train \
|
7 |
+
--do_eval \
|
8 |
+
--block_size 1024 \
|
9 |
+
--num_train_epochs 10 \
|
10 |
+
--learning_rate 4e-6 \
|
11 |
+
--per_device_train_batch_size 3 \
|
12 |
+
--per_device_eval_batch_size 3 \
|
13 |
+
--overwrite_output_dir \
|
14 |
+
--output_dir /mnt/disks/flaxdisk/norwegian-gptneo-red \
|
15 |
+
--cache_dir /mnt/disks/flaxdisk/cache/ \
|
16 |
+
--dtype bfloat16 \
|
17 |
+
--logging_steps 97 \
|
18 |
+
--eval_steps 96\
|
19 |
+
--push_to_hub
|
setup_devices.py
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
import jax
|
2 |
-
import jax.numpy as jnp
|
3 |
-
from transformers import FlaxGPTNeoForCausalLM, GPTNeoConfig
|
4 |
-
model = FlaxGPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
5 |
-
|
6 |
-
emb = jnp.zeros((50264, model.config.hidden_size))
|
7 |
-
# update the first 50257 weights using pre-trained weights
|
8 |
-
emb = jax.ops.index_update(emb, jax.ops.index[:50257, :], model.params["transformer"]["wte"]["embedding"])
|
9 |
-
params = model.params
|
10 |
-
params["transformer"]["wte"]["embedding"] = emb
|
11 |
-
|
12 |
-
# initialize a random model with the right vocab_size
|
13 |
-
config = GPTNeoConfig.from_pretrained("EleutherAI/gpt-neo-1.3B", vocab_size=50264)
|
14 |
-
model = FlaxGPTNeoForCausalLM(config)
|
15 |
-
|
16 |
-
# assign the pre-trained weights and save the model.
|
17 |
-
model.params = params
|
18 |
-
model.save_pretrained("gpt-neo-1.3B")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "special_tokens_map_file": null, "name_or_path": "norwegian-gpt2", "tokenizer_class": "GPT2Tokenizer"}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|