Upload 6 files

Browse files

Files changed (6) hide show

README.md +42 -0
config.json +33 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
spiece.model +3 -0
tokenizer_config.json +2 -0

README.md ADDED Viewed

	@@ -0,0 +1,42 @@

+---
+language: ro
+inference: true
+license: apache-2.0
+tags:
+- romanian
+- seq2seq
+- t5
+---
+This is the fine-tuned [mt5-base-romanian](https://huggingface.co/dumitrescustefan/mt5-base-romanian) base model (**390M** parameters).
+The model was fine-tuned on the [romanian diacritics dataset](https://huggingface.co/datasets/dumitrescustefan/diacritic) for 150k steps with a batch of size 8. The encoder sequence length is 256 and the decoder sequence length is also 256. It was trained with the following [scripts]().
+### How to load the fine-tuned mt5x model
+```python
+from transformers import MT5ForConditionalGeneration, T5Tokenizer
+model = MT5ForConditionalGeneration.from_pretrained('iliemihai/mt5-base-romanian-diacritics')
+tokenizer = T5Tokenizer.from_pretrained('iliemihai/mt5-base-romanian-diacritics')
+input_text = "A inceput sa ii taie un fir de par, iar fata sta in fata, tine camasa de in in mana si canta nota SI."
+inputs = tokenizer(input_text, max_length=256, truncation=True, return_tensors="pt")
+outputs = model.generate(input_ids=inputs["input_ids"], attention_mask=input_text["attention_mask"])
+output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(output)  # this will print "A început să îi taie un fir de păr, iar fata stă în față, ține cămașa de in în mână și cântă nota SI"
+```
+### Evaluation
+Evaluation will be done soon [here]()
+### Acknowledgements
+We'd like to thank [TPU Research Cloud](https://sites.research.google/trc/about/) for providing the TPUv3 cores we used to train these models!
+### Authors
+Yours truly,
+_[Stefan Dumitrescu](https://github.com/dumitrescustefan), [Mihai Ilie](https://github.com/iliemihai) and [Per Egil Kummervold](https://huggingface.co/north)_

config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "./finetuned_mt5-base_150000",
+  "architectures": [
+    "MT5ForConditionalGeneration"
+  ],
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "max_length": 512,
+  "model_type": "mt5",
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "tie_word_embeddings": false,
+  "tokenizer_class": "T5Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.23.1",
+  "use_cache": true,
+  "vocab_size": 250112
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6b377f8bad1a1261c05c16275cfcba0e445c7429bdb1039c7bdcfcd931dd88d
+size 2329696205

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
+size 4309802

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 0, "additional_special_tokens": null, "special_tokens_map_file": "/home/patrick/.cache/torch/transformers/685ac0ca8568ec593a48b61b0a3c272beee9bc194a3c7241d15dcadb5f875e53.f76030f3ec1b96a8199b2593390c610e76ca8028ef3d24680000619ffb646276", "tokenizer_file": null, "name_or_path": "google/mt5-small"}
2	+