Julian von der Goltz commited on Feb 9, 2024

Commit

0295e43

1 Parent(s): e583d3c

Train for 3 epochs

Files changed (19) hide show

.gitignore +1 -0
.idea/.gitignore +8 -0
.idea/Mistral-7B-dbnl-v0.1.iml +12 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
README.md +86 -0
adapter_config.json +30 -0
adapter_model.safetensors +3 -0
all_results.json +7 -0
special_tokens_map.json +24 -0
tokenizer.model +3 -0
tokenizer_config.json +44 -0
train_results.json +7 -0
trainer_log.jsonl +0 -0
trainer_state.json +0 -0
training_args.bin +3 -0
training_loss.png +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ checkpoint*

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/Mistral-7B-dbnl-v0.1.iml ADDED Viewed

	@@ -0,0 +1,12 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.10" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Mistral-7B-dbnl-v0.1.iml" filepath="$PROJECT_DIR$/.idea/Mistral-7B-dbnl-v0.1.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

README.md ADDED Viewed

	@@ -0,0 +1,86 @@

+---
+license: cc0-1.0
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: mistralai/Mistral-7B-v0.1
+model-index:
+- name: Mistral-7B-dbnl-v0.1
+  results: []
+datasets:
+- jvdgoltz/dbnl.org-dutch-public-domain
+language:
+- nl
+pipeline_tag: text-generation
+---
+# Mistral-7B-dbnl-v0.1
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the DBNL Public Domain dataset, featuring texts from the Dutch Literature that are in the public domain, specifically focusing on historical texts that are at least 140 years old.
+## Model description
+Mistral-7B-dbnl-v0.1 is designed to generate and understand Dutch literature, trained on a wide array of historical Dutch texts. This model leverages the LORA (Low-Rank Adaptation) technique for efficient parameter adaptation, providing a way to maintain high performance while being computationally efficient.
+## Intended uses & limitations
+I mostly created this for fun, cultural learnings and sharing with others.
+This model is can be used by researchers, historians, and natural language processing practitioners interested in Dutch literature, historical text analysis, and language modeling. It can be used for tasks such as text generation, language modeling, and more.
+### Limitations
+- The model is trained on historical texts, which may contain biases and outdated language that do not reflect current norms or values.
+- The model's performance and relevance may be limited to the context of Dutch literature and historical texts.
+## Training and evaluation data
+The model was trained on the DBNL Public Domain dataset, which includes a variety of texts such as books, poems, songs, and other documentation, ensuring a rich source of linguistic and cultural heritage.
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 1
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 16
+- total_eval_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 2000
+- num_epochs: 3.0
+### Adapter configuration
+The model uses LORA with the following configuration:
+- lora_alpha: 2048
+- r: 1024
+- lora_dropout: 0.0
+- inference_mode: true
+- init_lora_weights: true
+- peft_type: "LORA"
+- target_modules: ["q_proj", "v_proj", "up_proj", "o_proj", "k_proj", "gate_proj"]
+- task_type: "CAUSAL_LM"
+This configuration allows the model to adapt the pre-trained layers specifically for the task of causal language modeling with an efficient use of parameters.
+### Training results
+ ![Training loss](./training_loss.png)
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.37.1
+- Pytorch 2.1.1+cu121
+- Datasets 2.16.1
+- Tokenizers 0.15.1
+The model is an innovative example of applying advanced NLP techniques to historical texts, offering a unique resource for exploring Dutch literature and linguistics.

adapter_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 2048,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 1024,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "o_proj",
+    "k_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2473967aefe07d3899a45fe4ed0252610d27fc499d6f56e7b47f3179df439ea0
+size 4160803368

all_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 3.0,
+    "train_loss": 1.664952250687868,
+    "train_runtime": 336340.7542,
+    "train_samples_per_second": 1.55,
+    "train_steps_per_second": 0.097
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "split_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 3.0,
+    "train_loss": 1.664952250687868,
+    "train_runtime": 336340.7542,
+    "train_samples_per_second": 1.55,
+    "train_steps_per_second": 0.097
+}

trainer_log.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c37bf64d34962f45284b26b58825387a8865250d89b678f2b00af67c42e0dfd
+size 6072

training_loss.png ADDED Viewed