malteos
commited on
Commit
•
df8b4a1
1
Parent(s):
7a70b4f
added checkpoint
Browse files- README.md +61 -0
- layer_01-model_00-model_states.pt +3 -0
- layer_03-model_00-model_states.pt +3 -0
- layer_04-model_00-model_states.pt +3 -0
- layer_05-model_00-model_states.pt +3 -0
- layer_06-model_00-model_states.pt +3 -0
- layer_07-model_00-model_states.pt +3 -0
- layer_08-model_00-model_states.pt +3 -0
- layer_09-model_00-model_states.pt +3 -0
- layer_10-model_00-model_states.pt +3 -0
- layer_11-model_00-model_states.pt +3 -0
- layer_12-model_00-model_states.pt +3 -0
- layer_13-model_00-model_states.pt +3 -0
- layer_14-model_00-model_states.pt +3 -0
- layer_16-model_00-model_states.pt +3 -0
- mp_rank_00_model_states.pt +3 -0
- zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
README.md
CHANGED
@@ -1,3 +1,64 @@
|
|
1 |
---
|
|
|
2 |
license: mit
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
language: bg,cs,da,de,el,en,es,et,fi,fr,ga,hr,hu,it,lt,lv,mt,nl,pl,pt,ro,sk,sl,sv,uk
|
3 |
license: mit
|
4 |
---
|
5 |
+
|
6 |
+
# EuroGPT2
|
7 |
+
|
8 |
+
**NOTE: THIS IS THE ORIGINAL MEGATRON-DEEPSPEED CHECKPOINT INCLUDING OPTIMIZER STATES**
|
9 |
+
|
10 |
+
A GPT2 language model for European languages (EU-24 + Ukrainian).
|
11 |
+
The model follows the original architecture as [OpenAI's GPT2](https://huggingface.co/gpt2/) apart from using [rotary](https://arxiv.org/abs/2104.09864) instead of learned positional embeddigs.
|
12 |
+
|
13 |
+
## Model settings
|
14 |
+
|
15 |
+
- parameters: 124M
|
16 |
+
- number of layers: 12
|
17 |
+
- hidden size: 768
|
18 |
+
- number of heads: 12
|
19 |
+
- sequence length: 1024
|
20 |
+
- batch size: 168
|
21 |
+
- test PPL after training: 23.6 (steps: 436,940)
|
22 |
+
|
23 |
+
## Training data
|
24 |
+
|
25 |
+
- [Wikimedia dumps](https://dumps.wikimedia.org/) (Wikipedia, Wikinews, Wikibooks, Wikisource, Wikivoyage; 20230301)
|
26 |
+
- [EUR-Lex](https://huggingface.co/datasets/joelito/eurlex_resources)
|
27 |
+
- [OSCAR 2023.01](https://huggingface.co/datasets/oscar-corpus/OSCAR-2301)
|
28 |
+
- Tokens: 75,167,662,080
|
29 |
+
|
30 |
+
## Languages
|
31 |
+
|
32 |
+
Included languages: Bulgarian, Czech, Danish, German, Greek, English, Spanish, Estonian, Finnish, French, Irish, Croatian, Hungarian, Italian, Lithuanian, Latvian, Maltese, Dutch, Polish, Portuguese, Romanian, Slovak, Slovenian, Swedish, and Ukrainian.
|
33 |
+
|
34 |
+
| Language | Ratio |
|
35 |
+
| -------- | ------ |
|
36 |
+
| bg | 5,92% |
|
37 |
+
| cs | 4,77% |
|
38 |
+
| da | 2,19% |
|
39 |
+
| de | 7,36% |
|
40 |
+
| el | 8,60% |
|
41 |
+
| en | 10,11% |
|
42 |
+
| es | 6,57% |
|
43 |
+
| et | 1,67% |
|
44 |
+
| fi | 2,70% |
|
45 |
+
| fr | 7,18% |
|
46 |
+
| ga | 0,25% |
|
47 |
+
| hr | 1,09% |
|
48 |
+
| hu | 6,38% |
|
49 |
+
| it | 5,80% |
|
50 |
+
| lt | 2,01% |
|
51 |
+
| lv | 1,76% |
|
52 |
+
| mt | 1,49% |
|
53 |
+
| nl | 5,20% |
|
54 |
+
| pl | 4,82% |
|
55 |
+
| pt | 4,64% |
|
56 |
+
| ro | 2,93% |
|
57 |
+
| sk | 2,03% |
|
58 |
+
| sl | 1,54% |
|
59 |
+
| sv | 3,00% |
|
60 |
+
|
61 |
+
## License
|
62 |
+
|
63 |
+
MIT
|
64 |
+
|
layer_01-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63693d723a725d89521f988d15fb02b8a0f6f950fdadbf9305f12078568ca1e9
|
3 |
+
size 245760944
|
layer_03-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6bc86f5ca59ce7e6ed1672aebaf18b918cb33789b3daf39a023a0c2817a8f4b
|
3 |
+
size 14180439
|
layer_04-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdd83b88ebd4b97eef43f7c104b249f169d55b037064bcff98acd567bde797ce
|
3 |
+
size 14180439
|
layer_05-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b4a48681e2207020e5fd259b0ab553f06514cd43a0ce251dbf256b19b54528a
|
3 |
+
size 14180439
|
layer_06-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d98ebeee8fb3819bdc860e81a92e854cd5a82ea783271b8a6f20d67b9a932b58
|
3 |
+
size 14180439
|
layer_07-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11dfa01b44cdb0f024c427a2aea2809ce2d4f27cb8a93ec04955e8156861bfad
|
3 |
+
size 14180439
|
layer_08-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f870fdd7994921e8730d894f1d2a6ee0bd9d7c79db58561d861bc1a134241e51
|
3 |
+
size 14180439
|
layer_09-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1b6c1054949dd915124b11863124cabf9f3945186fde8d7e815818eea615ad3
|
3 |
+
size 14180439
|
layer_10-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea3d6f83ef9c2f664d3b296b3c6d3590194900c16c637bfd7017d263fe1befb7
|
3 |
+
size 14180439
|
layer_11-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb7384d86897a7055e9c4451d2a220f48176a180825396b744f530c8fa7519b0
|
3 |
+
size 14180439
|
layer_12-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8454f3493eb304025e825c9ed107b859db7cafe4dfbfd60e5b04bdcf00433644
|
3 |
+
size 14180439
|
layer_13-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08f29955c3e3f6bfe42af95b7c2ca1fdbfb05c9a3a7a06be372cd7e2e67a1614
|
3 |
+
size 14180439
|
layer_14-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40610079091482676c0aa2a892b7500822f65abba9113fd9cd778f1960499f0c
|
3 |
+
size 14180439
|
layer_16-model_00-model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58ebfeb6e0aade303d4d6c2a6d1ec380285c83d41f552649f3c92a2d1058d8ee
|
3 |
+
size 4291
|
mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e2c9e4dfbaebf178477f324306e42791b366ec0aaaeecafaa4fbad642616194
|
3 |
+
size 34163
|
zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ae9a8e198d8ccfc62d3a2eb19444c269b547b545d835d2d07ef581120b5e9b0
|
3 |
+
size 311908384
|
zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44bd79f0fcddef6decf7053edd377d7ca21e6f863a45442905aa22b064506b2a
|
3 |
+
size 311908384
|
zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddec9b7d663245e91ba78ff527c9561bba947b12ce05a460473bff968c2deab5
|
3 |
+
size 311908384
|
zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63132648b36d4051fe2be22858efe32a46d7a11038e0e3a7ea9e650d12d2283d
|
3 |
+
size 311908384
|
zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57741524bd3f68458a59b7984cb464459861718d285e64b5f737ac7fe72f2036
|
3 |
+
size 311908384
|
zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05bb0e625625304f47b4230fc46349af4ec6b131aa9d97ea4b177ce0db01806d
|
3 |
+
size 311908384
|
zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd79490a89d1884f805e388d03989ca18bcdfc2d60bb70b8cb493168e412a2bb
|
3 |
+
size 311908384
|
zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f23a3266dbc8a815a22d0a80ffb43d1c51dc26c4a85394258559d617e07d0fd
|
3 |
+
size 311908384
|