cakiki commited on
Commit
326fe35
1 Parent(s): a4866d4

Add training stub

Browse files
.dockerignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ saved_model
2
+ data
3
+ *.bin
4
+ *.h5
5
+ .git
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ data
Dockerfile CHANGED
@@ -1,3 +1,3 @@
1
  FROM tensorflow/tensorflow:2.6.0rc2-gpu-jupyter
2
 
3
- RUN pip install --use-feature=2020-resolver tensorflow_probability tensorflow-addons tensorflow_hub transformers pandas "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html flax nltk spacy seaborn rich torch
 
1
  FROM tensorflow/tensorflow:2.6.0rc2-gpu-jupyter
2
 
3
+ RUN pip install --use-feature=2020-resolver tensorflow_probability tensorflow-addons tensorflow_hub transformers datasets pandas "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html flax nltk spacy seaborn rich torch
Makefile CHANGED
@@ -1,4 +1,4 @@
1
- VERSION := 0.0.1
2
  NAME := gpt-code
3
  REPO := prophetikai
4
 
 
1
+ VERSION := 0.0.2
2
  NAME := gpt-code
3
  REPO := prophetikai
4
 
clone_sentdex_model_tokenizer.py CHANGED
File without changes
config.json CHANGED
@@ -32,7 +32,7 @@
32
  }
33
  },
34
  "torch_dtype": "float32",
35
- "transformers_version": "4.9.1",
36
  "use_cache": true,
37
  "vocab_size": 52000
38
  }
 
32
  }
33
  },
34
  "torch_dtype": "float32",
35
+ "transformers_version": "4.9.2",
36
  "use_cache": true,
37
  "vocab_size": 52000
38
  }
prophetikai/gpt-code/config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "_name_or_path": "Sentdex/GPyT",
3
- "activation_function": "gelu_new",
4
- "architectures": [
5
- "GPT2LMHeadModel"
6
- ],
7
- "attn_pdrop": 0.1,
8
- "bos_token_id": 0,
9
- "embd_pdrop": 0.1,
10
- "eos_token_id": 2,
11
- "gradient_checkpointing": false,
12
- "initializer_range": 0.02,
13
- "layer_norm_epsilon": 1e-05,
14
- "model_type": "gpt2",
15
- "n_ctx": 1024,
16
- "n_embd": 768,
17
- "n_head": 12,
18
- "n_inner": null,
19
- "n_layer": 12,
20
- "n_positions": 1024,
21
- "resid_pdrop": 0.1,
22
- "scale_attn_weights": true,
23
- "summary_activation": null,
24
- "summary_first_dropout": 0.1,
25
- "summary_proj_to_labels": true,
26
- "summary_type": "cls_index",
27
- "summary_use_proj": true,
28
- "task_specific_params": {
29
- "text-generation": {
30
- "do_sample": false,
31
- "max_length": 50
32
- }
33
- },
34
- "transformers_version": "4.9.1",
35
- "use_cache": true,
36
- "vocab_size": 52000
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
saved_model/sentdex/keras_metadata.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:129bbccc8aafa2537415e4b8c5a4aeb1e0f44b3e8e1cec7b74b6c6f9949a9d00
3
  size 67119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c122d854316f313f362e8236dc36642fde96756a8a492c806f70264afa96efa
3
  size 67119
saved_model/sentdex/saved_model.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50772dffc2f6ec1132bd2fd2a5b7c69957a89eee2a5a1f3bfaa2482e10484021
3
- size 5079982
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55d97e20adc190a8ba38e59d0be3db2ffd999b46cccf36d76438b9b172cd7ef1
3
+ size 5075538
saved_model/sentdex/variables/variables.data-00000-of-00001 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a01a55505ffc1bbeba1b8390a27893b44ef69c8ee8998efaa973d762d1c068ab
3
- size 503191134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:795c48f76144639181568cde0f584330559792342d552a69fa17d293687f7c58
3
+ size 503189630
saved_model/sentdex/variables/variables.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81a3da439dbad26f51738172b8f69c9b6dfa87f37afef4bb5537fa840e7f284d
3
- size 8900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63b43223ff4ea0dc60d5f447600078a2ce54088c634b7e117504cc4ca765b9d0
3
+ size 8801
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0f89327e2d2584dc10b055a992cd31fe6cb342175df9d5d46e063eb1d9dfd1d
3
  size 503289416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8765e985e5ae7e84747e2368fc4de8bcc2db910401ab90caff21d0e4fcbbea49
3
  size 503289416
train.py CHANGED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ from datasets import load_dataset
3
+
4
+ _DATA_DIR = 'data'
5
+ splits = {
6
+ 'train': [f'{_DATA_DIR}/train/python_train_{i}.jsonl' for i in range(14)],
7
+ 'validation': [f'{_DATA_DIR}/valid/python_valid_{i}.jsonl' for i in range(0)],
8
+ 'test': [f'{_DATA_DIR}/test/python_test_{i}.jsonl' for i in range(0)]
9
+ }
10
+
11
+ dataset = load_dataset('json', data_files=splits)
12
+
13
+ if __name__ == '__main__':
14
+ print(dataset)