versae commited on
Commit
20ab046
1 Parent(s): b47ee61

BPE and Unigram SentencePiece tokenizers for French, Italian, Portuguese, Romanian, and Spanish

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. oscar/es-fr-it-pt-ro_16000_bpe.sp.model +3 -0
  2. oscar/es-fr-it-pt-ro_16000_bpe.sp.vocab +0 -0
  3. oscar/es-fr-it-pt-ro_16000_unigram.sp.model +3 -0
  4. oscar/es-fr-it-pt-ro_16000_unigram.sp.vocab +0 -0
  5. oscar/es-fr-it-pt-ro_32000_bpe.sp.model +3 -0
  6. oscar/es-fr-it-pt-ro_32000_bpe.sp.vocab +0 -0
  7. oscar/es-fr-it-pt-ro_32000_unigram.sp.model +3 -0
  8. oscar/es-fr-it-pt-ro_32000_unigram.sp.vocab +0 -0
  9. oscar/es-fr-it-pt-ro_64000_bpe.sp.model +3 -0
  10. oscar/es-fr-it-pt-ro_64000_bpe.sp.vocab +0 -0
  11. oscar/es-fr-it-pt-ro_64000_unigram.sp.model +3 -0
  12. oscar/es-fr-it-pt-ro_64000_unigram.sp.vocab +0 -0
  13. oscar/es_16000_bpe.100extra.sp.model +3 -0
  14. oscar/es_16000_bpe.100extra.sp.vocab +0 -0
  15. oscar/es_16000_bpe.sp.model +3 -0
  16. oscar/es_16000_bpe.sp.vocab +0 -0
  17. oscar/es_16000_unigram.100extra.sp.model +3 -0
  18. oscar/es_16000_unigram.100extra.sp.vocab +0 -0
  19. oscar/es_16000_unigram.sp.model +3 -0
  20. oscar/es_16000_unigram.sp.vocab +0 -0
  21. oscar/es_32000_bpe.100extra.sp.model +3 -0
  22. oscar/es_32000_bpe.100extra.sp.vocab +0 -0
  23. oscar/es_32000_bpe.sp.model +3 -0
  24. oscar/es_32000_bpe.sp.vocab +0 -0
  25. oscar/es_32000_unigram.100extra.sp.model +3 -0
  26. oscar/es_32000_unigram.100extra.sp.vocab +0 -0
  27. oscar/es_32000_unigram.sp.model +3 -0
  28. oscar/es_32000_unigram.sp.vocab +0 -0
  29. oscar/es_64000_bpe.100extra.sp.model +3 -0
  30. oscar/es_64000_bpe.100extra.sp.vocab +0 -0
  31. oscar/es_64000_bpe.sp.model +3 -0
  32. oscar/es_64000_bpe.sp.vocab +0 -0
  33. oscar/es_64000_unigram.100extra.sp.model +3 -0
  34. oscar/es_64000_unigram.100extra.sp.vocab +0 -0
  35. oscar/es_64000_unigram.sp.model +3 -0
  36. oscar/es_64000_unigram.sp.vocab +0 -0
  37. oscar/fr_16000_bpe.100extra.sp.model +3 -0
  38. oscar/fr_16000_bpe.100extra.sp.vocab +0 -0
  39. oscar/fr_16000_bpe.sp.model +3 -0
  40. oscar/fr_16000_bpe.sp.vocab +0 -0
  41. oscar/fr_16000_unigram.100extra.sp.model +3 -0
  42. oscar/fr_16000_unigram.100extra.sp.vocab +0 -0
  43. oscar/fr_16000_unigram.sp.model +3 -0
  44. oscar/fr_16000_unigram.sp.vocab +0 -0
  45. oscar/fr_32000_bpe.100extra.sp.model +3 -0
  46. oscar/fr_32000_bpe.100extra.sp.vocab +0 -0
  47. oscar/fr_32000_bpe.sp.model +3 -0
  48. oscar/fr_32000_bpe.sp.vocab +0 -0
  49. oscar/fr_32000_unigram.100extra.sp.model +3 -0
  50. oscar/fr_32000_unigram.100extra.sp.vocab +0 -0
oscar/es-fr-it-pt-ro_16000_bpe.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af24684abd89fba7ec1f43087d14e37f408cc516939f8d46e78246801229e828
3
+ size 457251
oscar/es-fr-it-pt-ro_16000_bpe.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es-fr-it-pt-ro_16000_unigram.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3c22b8712cb278df5ebf59f0a6eec729870fb8bf66aa98982b71e2a69692d1
3
+ size 464712
oscar/es-fr-it-pt-ro_16000_unigram.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es-fr-it-pt-ro_32000_bpe.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c3237e9d349896bf6df193eac3c64c4e85cf368e90eeb60a201c76b39f0164f
3
+ size 732190
oscar/es-fr-it-pt-ro_32000_bpe.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es-fr-it-pt-ro_32000_unigram.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71c532c2597a454b58923c5f3d2b7ed88d1a7b2627a500d000caff9b5ac7aaf6
3
+ size 756243
oscar/es-fr-it-pt-ro_32000_unigram.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es-fr-it-pt-ro_64000_bpe.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6692779662fa27b6cfafc601d7e1cde1abd70b92f2f83b074d2727779cd29d15
3
+ size 1309598
oscar/es-fr-it-pt-ro_64000_bpe.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es-fr-it-pt-ro_64000_unigram.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc0e99f7898055cc14b737434c2b81950f302949b8e5cb831a31e074cad8400
3
+ size 1355270
oscar/es-fr-it-pt-ro_64000_unigram.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_16000_bpe.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:928c7ea99e0cf0b5e1d1445924df6df515f92f24564094bb375487395d7f6792
3
+ size 469606
oscar/es_16000_bpe.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_16000_bpe.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c73774e85a315f9f8bcc087f7e7630aac7e4e2f4c62fbeaec69f97292827f61
3
+ size 467301
oscar/es_16000_bpe.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_16000_unigram.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:275741aa20c610532cf8dbc759f6655c4cc584fffd928ec0a14467ea84f9789a
3
+ size 478205
oscar/es_16000_unigram.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_16000_unigram.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab0f15cf46f98c8ea2eddcbfbd5789139e8071ef3b6a1f352cf4a968d676629a
3
+ size 476199
oscar/es_16000_unigram.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_32000_bpe.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc2e719cde09f9e758dca8fa858c2f219012174a4bb48b49c1b6f24521223203
3
+ size 752964
oscar/es_32000_bpe.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_32000_bpe.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1744278de3646b781273383227567b96142bfffa6868185e436cd7a2a94d9d74
3
+ size 750794
oscar/es_32000_bpe.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_32000_unigram.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0406746e2adaf29c1b3dfe2dc50fa44cb29fbddb6f0819e1c057ff278ff26ed
3
+ size 773784
oscar/es_32000_unigram.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_32000_unigram.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b138252ec45ba516f8a34f5ccdc41ca838f261911eb8e2c6aedbfb42cc38347a
3
+ size 771797
oscar/es_32000_unigram.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_64000_bpe.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf2c625b0f5823c1a3952e05d3353833699c21a569dec0f269edd5c44f23da7
3
+ size 1332184
oscar/es_64000_bpe.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_64000_bpe.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68021b67fae345b742c4148bb17b3ec1f716ed9d1010964a5190c4d9ad8424ee
3
+ size 1330060
oscar/es_64000_bpe.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_64000_unigram.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e91e0e13dad1b2333d4911ee424ee88ba27aca918e58b597cd799551ffeb50eb
3
+ size 1371690
oscar/es_64000_unigram.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/es_64000_unigram.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c7834e8ac1eed10f8010f39db96a524b3c01f899e153b20911c6e9c3ebd2b8
3
+ size 1369670
oscar/es_64000_unigram.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/fr_16000_bpe.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8474761127d57c1c103419b272eb26a120881a3fd83fb86b5a1a0bf389dba947
3
+ size 463520
oscar/fr_16000_bpe.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/fr_16000_bpe.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6483e47dd59cee6c1735c4d7b69d78e61fbbdd689d5adcd11f88786b18e738d9
3
+ size 461284
oscar/fr_16000_bpe.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/fr_16000_unigram.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bef154647c0f121c5baa82de58c0c6022652869c61348f46eb79c2f99513866
3
+ size 469639
oscar/fr_16000_unigram.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/fr_16000_unigram.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a86a542a60b50a1b1fffabdc1039bd40b57d0a9ec0829eb9642fd016e5494b01
3
+ size 467551
oscar/fr_16000_unigram.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/fr_32000_bpe.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34f49e788acc9668dc4ca17d0b2c3500a2af5439723c1cd05850ddfac9263f7f
3
+ size 742691
oscar/fr_32000_bpe.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/fr_32000_bpe.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28c4343d6c0186da9b7b4d64a68a0985636f402b8014cbe8de9b6ce5b5a57da3
3
+ size 740436
oscar/fr_32000_bpe.sp.vocab ADDED
The diff for this file is too large to render. See raw diff
 
oscar/fr_32000_unigram.100extra.sp.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5efc025b1741e5391afa3ac0cb2cbda41c3ba6075e70e3825afe2c2824f6236a
3
+ size 758909
oscar/fr_32000_unigram.100extra.sp.vocab ADDED
The diff for this file is too large to render. See raw diff