BPE and Unigram SentencePiece tokenizers for French, Italian, Portuguese, Romanian, and Spanish
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- oscar/es-fr-it-pt-ro_16000_bpe.sp.model +3 -0
- oscar/es-fr-it-pt-ro_16000_bpe.sp.vocab +0 -0
- oscar/es-fr-it-pt-ro_16000_unigram.sp.model +3 -0
- oscar/es-fr-it-pt-ro_16000_unigram.sp.vocab +0 -0
- oscar/es-fr-it-pt-ro_32000_bpe.sp.model +3 -0
- oscar/es-fr-it-pt-ro_32000_bpe.sp.vocab +0 -0
- oscar/es-fr-it-pt-ro_32000_unigram.sp.model +3 -0
- oscar/es-fr-it-pt-ro_32000_unigram.sp.vocab +0 -0
- oscar/es-fr-it-pt-ro_64000_bpe.sp.model +3 -0
- oscar/es-fr-it-pt-ro_64000_bpe.sp.vocab +0 -0
- oscar/es-fr-it-pt-ro_64000_unigram.sp.model +3 -0
- oscar/es-fr-it-pt-ro_64000_unigram.sp.vocab +0 -0
- oscar/es_16000_bpe.100extra.sp.model +3 -0
- oscar/es_16000_bpe.100extra.sp.vocab +0 -0
- oscar/es_16000_bpe.sp.model +3 -0
- oscar/es_16000_bpe.sp.vocab +0 -0
- oscar/es_16000_unigram.100extra.sp.model +3 -0
- oscar/es_16000_unigram.100extra.sp.vocab +0 -0
- oscar/es_16000_unigram.sp.model +3 -0
- oscar/es_16000_unigram.sp.vocab +0 -0
- oscar/es_32000_bpe.100extra.sp.model +3 -0
- oscar/es_32000_bpe.100extra.sp.vocab +0 -0
- oscar/es_32000_bpe.sp.model +3 -0
- oscar/es_32000_bpe.sp.vocab +0 -0
- oscar/es_32000_unigram.100extra.sp.model +3 -0
- oscar/es_32000_unigram.100extra.sp.vocab +0 -0
- oscar/es_32000_unigram.sp.model +3 -0
- oscar/es_32000_unigram.sp.vocab +0 -0
- oscar/es_64000_bpe.100extra.sp.model +3 -0
- oscar/es_64000_bpe.100extra.sp.vocab +0 -0
- oscar/es_64000_bpe.sp.model +3 -0
- oscar/es_64000_bpe.sp.vocab +0 -0
- oscar/es_64000_unigram.100extra.sp.model +3 -0
- oscar/es_64000_unigram.100extra.sp.vocab +0 -0
- oscar/es_64000_unigram.sp.model +3 -0
- oscar/es_64000_unigram.sp.vocab +0 -0
- oscar/fr_16000_bpe.100extra.sp.model +3 -0
- oscar/fr_16000_bpe.100extra.sp.vocab +0 -0
- oscar/fr_16000_bpe.sp.model +3 -0
- oscar/fr_16000_bpe.sp.vocab +0 -0
- oscar/fr_16000_unigram.100extra.sp.model +3 -0
- oscar/fr_16000_unigram.100extra.sp.vocab +0 -0
- oscar/fr_16000_unigram.sp.model +3 -0
- oscar/fr_16000_unigram.sp.vocab +0 -0
- oscar/fr_32000_bpe.100extra.sp.model +3 -0
- oscar/fr_32000_bpe.100extra.sp.vocab +0 -0
- oscar/fr_32000_bpe.sp.model +3 -0
- oscar/fr_32000_bpe.sp.vocab +0 -0
- oscar/fr_32000_unigram.100extra.sp.model +3 -0
- oscar/fr_32000_unigram.100extra.sp.vocab +0 -0
oscar/es-fr-it-pt-ro_16000_bpe.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af24684abd89fba7ec1f43087d14e37f408cc516939f8d46e78246801229e828
|
3 |
+
size 457251
|
oscar/es-fr-it-pt-ro_16000_bpe.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es-fr-it-pt-ro_16000_unigram.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f3c22b8712cb278df5ebf59f0a6eec729870fb8bf66aa98982b71e2a69692d1
|
3 |
+
size 464712
|
oscar/es-fr-it-pt-ro_16000_unigram.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es-fr-it-pt-ro_32000_bpe.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c3237e9d349896bf6df193eac3c64c4e85cf368e90eeb60a201c76b39f0164f
|
3 |
+
size 732190
|
oscar/es-fr-it-pt-ro_32000_bpe.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es-fr-it-pt-ro_32000_unigram.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71c532c2597a454b58923c5f3d2b7ed88d1a7b2627a500d000caff9b5ac7aaf6
|
3 |
+
size 756243
|
oscar/es-fr-it-pt-ro_32000_unigram.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es-fr-it-pt-ro_64000_bpe.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6692779662fa27b6cfafc601d7e1cde1abd70b92f2f83b074d2727779cd29d15
|
3 |
+
size 1309598
|
oscar/es-fr-it-pt-ro_64000_bpe.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es-fr-it-pt-ro_64000_unigram.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdc0e99f7898055cc14b737434c2b81950f302949b8e5cb831a31e074cad8400
|
3 |
+
size 1355270
|
oscar/es-fr-it-pt-ro_64000_unigram.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_16000_bpe.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:928c7ea99e0cf0b5e1d1445924df6df515f92f24564094bb375487395d7f6792
|
3 |
+
size 469606
|
oscar/es_16000_bpe.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_16000_bpe.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c73774e85a315f9f8bcc087f7e7630aac7e4e2f4c62fbeaec69f97292827f61
|
3 |
+
size 467301
|
oscar/es_16000_bpe.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_16000_unigram.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:275741aa20c610532cf8dbc759f6655c4cc584fffd928ec0a14467ea84f9789a
|
3 |
+
size 478205
|
oscar/es_16000_unigram.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_16000_unigram.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab0f15cf46f98c8ea2eddcbfbd5789139e8071ef3b6a1f352cf4a968d676629a
|
3 |
+
size 476199
|
oscar/es_16000_unigram.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_32000_bpe.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc2e719cde09f9e758dca8fa858c2f219012174a4bb48b49c1b6f24521223203
|
3 |
+
size 752964
|
oscar/es_32000_bpe.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_32000_bpe.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1744278de3646b781273383227567b96142bfffa6868185e436cd7a2a94d9d74
|
3 |
+
size 750794
|
oscar/es_32000_bpe.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_32000_unigram.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0406746e2adaf29c1b3dfe2dc50fa44cb29fbddb6f0819e1c057ff278ff26ed
|
3 |
+
size 773784
|
oscar/es_32000_unigram.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_32000_unigram.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b138252ec45ba516f8a34f5ccdc41ca838f261911eb8e2c6aedbfb42cc38347a
|
3 |
+
size 771797
|
oscar/es_32000_unigram.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_64000_bpe.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebf2c625b0f5823c1a3952e05d3353833699c21a569dec0f269edd5c44f23da7
|
3 |
+
size 1332184
|
oscar/es_64000_bpe.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_64000_bpe.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68021b67fae345b742c4148bb17b3ec1f716ed9d1010964a5190c4d9ad8424ee
|
3 |
+
size 1330060
|
oscar/es_64000_bpe.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_64000_unigram.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e91e0e13dad1b2333d4911ee424ee88ba27aca918e58b597cd799551ffeb50eb
|
3 |
+
size 1371690
|
oscar/es_64000_unigram.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/es_64000_unigram.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39c7834e8ac1eed10f8010f39db96a524b3c01f899e153b20911c6e9c3ebd2b8
|
3 |
+
size 1369670
|
oscar/es_64000_unigram.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/fr_16000_bpe.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8474761127d57c1c103419b272eb26a120881a3fd83fb86b5a1a0bf389dba947
|
3 |
+
size 463520
|
oscar/fr_16000_bpe.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/fr_16000_bpe.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6483e47dd59cee6c1735c4d7b69d78e61fbbdd689d5adcd11f88786b18e738d9
|
3 |
+
size 461284
|
oscar/fr_16000_bpe.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/fr_16000_unigram.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bef154647c0f121c5baa82de58c0c6022652869c61348f46eb79c2f99513866
|
3 |
+
size 469639
|
oscar/fr_16000_unigram.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/fr_16000_unigram.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a86a542a60b50a1b1fffabdc1039bd40b57d0a9ec0829eb9642fd016e5494b01
|
3 |
+
size 467551
|
oscar/fr_16000_unigram.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/fr_32000_bpe.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34f49e788acc9668dc4ca17d0b2c3500a2af5439723c1cd05850ddfac9263f7f
|
3 |
+
size 742691
|
oscar/fr_32000_bpe.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/fr_32000_bpe.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28c4343d6c0186da9b7b4d64a68a0985636f402b8014cbe8de9b6ce5b5a57da3
|
3 |
+
size 740436
|
oscar/fr_32000_bpe.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|
oscar/fr_32000_unigram.100extra.sp.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5efc025b1741e5391afa3ac0cb2cbda41c3ba6075e70e3825afe2c2824f6236a
|
3 |
+
size 758909
|
oscar/fr_32000_unigram.100extra.sp.vocab
ADDED
The diff for this file is too large to render.
See raw diff
|
|