Julian von der Goltz commited on
Commit
0295e43
1 Parent(s): e583d3c

Train for 3 epochs

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint*
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/Mistral-7B-dbnl-v0.1.iml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ <component name="PyDocumentationSettings">
9
+ <option name="format" value="PLAIN" />
10
+ <option name="myDocStringFormat" value="Plain" />
11
+ </component>
12
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.10" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/Mistral-7B-dbnl-v0.1.iml" filepath="$PROJECT_DIR$/.idea/Mistral-7B-dbnl-v0.1.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc0-1.0
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: mistralai/Mistral-7B-v0.1
9
+ model-index:
10
+ - name: Mistral-7B-dbnl-v0.1
11
+ results: []
12
+ datasets:
13
+ - jvdgoltz/dbnl.org-dutch-public-domain
14
+ language:
15
+ - nl
16
+ pipeline_tag: text-generation
17
+ ---
18
+
19
+ # Mistral-7B-dbnl-v0.1
20
+
21
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the DBNL Public Domain dataset, featuring texts from the Dutch Literature that are in the public domain, specifically focusing on historical texts that are at least 140 years old.
22
+
23
+ ## Model description
24
+
25
+ Mistral-7B-dbnl-v0.1 is designed to generate and understand Dutch literature, trained on a wide array of historical Dutch texts. This model leverages the LORA (Low-Rank Adaptation) technique for efficient parameter adaptation, providing a way to maintain high performance while being computationally efficient.
26
+
27
+ ## Intended uses & limitations
28
+
29
+ I mostly created this for fun, cultural learnings and sharing with others.
30
+
31
+ This model is can be used by researchers, historians, and natural language processing practitioners interested in Dutch literature, historical text analysis, and language modeling. It can be used for tasks such as text generation, language modeling, and more.
32
+
33
+ ### Limitations
34
+ - The model is trained on historical texts, which may contain biases and outdated language that do not reflect current norms or values.
35
+ - The model's performance and relevance may be limited to the context of Dutch literature and historical texts.
36
+
37
+ ## Training and evaluation data
38
+
39
+ The model was trained on the DBNL Public Domain dataset, which includes a variety of texts such as books, poems, songs, and other documentation, ensuring a rich source of linguistic and cultural heritage.
40
+
41
+ ## Training procedure
42
+
43
+ ### Training hyperparameters
44
+
45
+ The following hyperparameters were used during training:
46
+ - learning_rate: 5e-05
47
+ - train_batch_size: 1
48
+ - eval_batch_size: 8
49
+ - seed: 42
50
+ - distributed_type: multi-GPU
51
+ - num_devices: 2
52
+ - gradient_accumulation_steps: 8
53
+ - total_train_batch_size: 16
54
+ - total_eval_batch_size: 16
55
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
56
+ - lr_scheduler_type: cosine
57
+ - lr_scheduler_warmup_steps: 2000
58
+ - num_epochs: 3.0
59
+
60
+ ### Adapter configuration
61
+
62
+ The model uses LORA with the following configuration:
63
+ - lora_alpha: 2048
64
+ - r: 1024
65
+ - lora_dropout: 0.0
66
+ - inference_mode: true
67
+ - init_lora_weights: true
68
+ - peft_type: "LORA"
69
+ - target_modules: ["q_proj", "v_proj", "up_proj", "o_proj", "k_proj", "gate_proj"]
70
+ - task_type: "CAUSAL_LM"
71
+
72
+ This configuration allows the model to adapt the pre-trained layers specifically for the task of causal language modeling with an efficient use of parameters.
73
+
74
+ ### Training results
75
+
76
+ ![Training loss](./training_loss.png)
77
+
78
+ ### Framework versions
79
+
80
+ - PEFT 0.7.1
81
+ - Transformers 4.37.1
82
+ - Pytorch 2.1.1+cu121
83
+ - Datasets 2.16.1
84
+ - Tokenizers 0.15.1
85
+
86
+ The model is an innovative example of applying advanced NLP techniques to historical texts, offering a unique resource for exploring Dutch literature and linguistics.
adapter_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 2048,
13
+ "lora_dropout": 0.0,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 1024,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "q_proj",
23
+ "v_proj",
24
+ "up_proj",
25
+ "o_proj",
26
+ "k_proj",
27
+ "gate_proj"
28
+ ],
29
+ "task_type": "CAUSAL_LM"
30
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2473967aefe07d3899a45fe4ed0252610d27fc499d6f56e7b47f3179df439ea0
3
+ size 4160803368
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 1.664952250687868,
4
+ "train_runtime": 336340.7542,
5
+ "train_samples_per_second": 1.55,
6
+ "train_steps_per_second": 0.097
7
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "</s>",
37
+ "padding_side": "right",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "split_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": false
44
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 1.664952250687868,
4
+ "train_runtime": 336340.7542,
5
+ "train_samples_per_second": 1.55,
6
+ "train_steps_per_second": 0.097
7
+ }
trainer_log.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c37bf64d34962f45284b26b58825387a8865250d89b678f2b00af67c42e0dfd
3
+ size 6072
training_loss.png ADDED