joanllop commited on
Commit
87ed7e8
·
1 Parent(s): f8df299

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +131 -94
README.md CHANGED
@@ -1,10 +1,17 @@
1
  ---
 
 
 
 
 
2
  tags:
3
- - generated_from_trainer
 
 
4
  datasets:
5
- - /bscdata/data/open_data_26B_tokens_balanced_es_ca/open_data_26B_tokens_balanced_es_ca.py
6
  metrics:
7
- - accuracy
8
  model-index:
9
  - name: falcon_7b_balanced_tokenizer_fp16_CPT_open_data_26B_tokens_balanced_es_ca
10
  results:
@@ -12,24 +19,54 @@ model-index:
12
  name: Causal Language Modeling
13
  type: text-generation
14
  dataset:
15
- name: /bscdata/data/open_data_26B_tokens_balanced_es_ca/open_data_26B_tokens_balanced_es_ca.py
16
- default
17
- type: /bscdata/data/open_data_26B_tokens_balanced_es_ca/open_data_26B_tokens_balanced_es_ca.py
18
  config: default
19
  split: validation
20
  args: default
21
  metrics:
22
- - name: Accuracy
23
- type: accuracy
24
- value: 0.5258444783433934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  ---
26
 
27
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
- should probably proofread and complete it, then remove this comment. -->
29
-
30
  # falcon_7b_balanced_tokenizer_fp16_CPT_open_data_26B_tokens_balanced_es_ca
31
 
32
- This model is a fine-tuned version of [/bscdata/models/falcon_7b_balanced_tokenizer_fp16/](https://huggingface.co//bscdata/models/falcon_7b_balanced_tokenizer_fp16/) on the /bscdata/data/open_data_26B_tokens_balanced_es_ca/open_data_26B_tokens_balanced_es_ca.py default dataset.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  It achieves the following results on the evaluation set:
34
  - Loss: 2.1504
35
  - Accuracy: 0.5258
@@ -654,86 +691,86 @@ The following hyperparameters were used during training:
654
  | 2.1148 | 0.88 | 2925000 | 0.5267 | 2.1445 |
655
  | 2.1188 | 0.88 | 2930000 | 0.5270 | 2.1445 |
656
  | 2.1118 | 0.88 | 2935000 | 0.5270 | 2.1445 |
657
- | 2.1283 | 0.88 | 2940000 | 2.1582 | 0.5244 |
658
- | 2.1336 | 0.88 | 2945000 | 2.1621 | 0.5240 |
659
- | 2.1311 | 0.88 | 2950000 | 2.1621 | 0.5237 |
660
- | 2.1377 | 0.89 | 2955000 | 2.1641 | 0.5236 |
661
- | 2.136 | 0.89 | 2960000 | 2.1641 | 0.5236 |
662
- | 2.1394 | 0.89 | 2965000 | 2.1641 | 0.5233 |
663
- | 2.1405 | 0.89 | 2970000 | 2.1660 | 0.5233 |
664
- | 2.1391 | 0.89 | 2975000 | 2.1660 | 0.5236 |
665
- | 2.1353 | 0.89 | 2980000 | 2.1660 | 0.5234 |
666
- | 2.1392 | 0.89 | 2985000 | 2.1660 | 0.5234 |
667
- | 2.1384 | 0.9 | 2990000 | 2.1660 | 0.5235 |
668
- | 2.1373 | 0.9 | 2995000 | 2.1660 | 0.5233 |
669
- | 2.1346 | 0.9 | 3000000 | 2.1660 | 0.5234 |
670
- | 2.1368 | 0.9 | 3005000 | 2.1660 | 0.5235 |
671
- | 2.1383 | 0.9 | 3010000 | 2.1660 | 0.5233 |
672
- | 2.1447 | 0.9 | 3015000 | 2.1660 | 0.5233 |
673
- | 2.1392 | 0.9 | 3020000 | 2.1660 | 0.5234 |
674
- | 2.1359 | 0.91 | 3025000 | 2.1660 | 0.5233 |
675
- | 2.1408 | 0.91 | 3030000 | 2.1660 | 0.5233 |
676
- | 2.1437 | 0.91 | 3035000 | 2.1660 | 0.5233 |
677
- | 2.1354 | 0.91 | 3040000 | 2.1660 | 0.5233 |
678
- | 2.1371 | 0.91 | 3045000 | 2.1660 | 0.5235 |
679
- | 2.1399 | 0.91 | 3050000 | 2.1660 | 0.5234 |
680
- | 2.1387 | 0.92 | 3055000 | 2.1660 | 0.5234 |
681
- | 2.1406 | 0.92 | 3060000 | 2.1660 | 0.5232 |
682
- | 2.1387 | 0.92 | 3065000 | 2.1660 | 0.5235 |
683
- | 2.1413 | 0.92 | 3070000 | 2.1660 | 0.5235 |
684
- | 2.1371 | 0.92 | 3075000 | 2.1641 | 0.5235 |
685
- | 2.138 | 0.92 | 3080000 | 2.1641 | 0.5235 |
686
- | 2.1385 | 0.92 | 3085000 | 2.1641 | 0.5236 |
687
- | 2.135 | 0.93 | 3090000 | 2.1660 | 0.5234 |
688
- | 2.1401 | 0.93 | 3095000 | 2.1641 | 0.5236 |
689
- | 2.1374 | 0.93 | 3100000 | 2.1641 | 0.5236 |
690
- | 2.1358 | 0.93 | 3105000 | 2.1641 | 0.5237 |
691
- | 2.1344 | 0.93 | 3110000 | 2.1621 | 0.5239 |
692
- | 2.1368 | 0.93 | 3115000 | 2.1621 | 0.5239 |
693
- | 2.1345 | 0.93 | 3120000 | 2.1621 | 0.5237 |
694
- | 2.1358 | 0.94 | 3125000 | 2.1621 | 0.5239 |
695
- | 2.1395 | 0.94 | 3130000 | 2.1621 | 0.5239 |
696
- | 2.1359 | 0.94 | 3135000 | 2.1621 | 0.5243 |
697
- | 2.1373 | 0.94 | 3140000 | 2.1602 | 0.5242 |
698
- | 2.1357 | 0.94 | 3145000 | 2.1602 | 0.5243 |
699
- | 2.1354 | 0.94 | 3150000 | 2.1602 | 0.5244 |
700
- | 2.1323 | 0.95 | 3155000 | 2.1602 | 0.5244 |
701
- | 2.133 | 0.95 | 3160000 | 2.1602 | 0.5242 |
702
- | 2.1315 | 0.95 | 3165000 | 2.1602 | 0.5244 |
703
- | 2.1363 | 0.95 | 3170000 | 2.1602 | 0.5243 |
704
- | 2.1349 | 0.95 | 3175000 | 2.1602 | 0.5245 |
705
- | 2.1336 | 0.95 | 3180000 | 2.1602 | 0.5244 |
706
- | 2.1364 | 0.95 | 3185000 | 2.1582 | 0.5244 |
707
- | 2.133 | 0.96 | 3190000 | 2.1582 | 0.5243 |
708
- | 2.1349 | 0.96 | 3195000 | 2.1582 | 0.5245 |
709
- | 2.134 | 0.96 | 3200000 | 2.1582 | 0.5246 |
710
- | 2.1308 | 0.96 | 3205000 | 2.1562 | 0.5249 |
711
- | 2.1302 | 0.96 | 3210000 | 2.1562 | 0.5247 |
712
- | 2.1302 | 0.96 | 3215000 | 2.1562 | 0.5247 |
713
- | 2.1331 | 0.96 | 3220000 | 2.1562 | 0.5248 |
714
- | 2.1273 | 0.97 | 3225000 | 2.1562 | 0.5247 |
715
- | 2.1286 | 0.97 | 3230000 | 2.1562 | 0.5250 |
716
- | 2.1282 | 0.97 | 3235000 | 2.1543 | 0.5250 |
717
- | 2.1309 | 0.97 | 3240000 | 2.1543 | 0.5251 |
718
- | 2.1295 | 0.97 | 3245000 | 2.1543 | 0.5254 |
719
- | 2.1275 | 0.97 | 3250000 | 2.1543 | 0.5254 |
720
- | 2.133 | 0.98 | 3255000 | 2.1543 | 0.5254 |
721
- | 2.1301 | 0.98 | 3260000 | 2.1543 | 0.5251 |
722
- | 2.1314 | 0.98 | 3265000 | 2.1523 | 0.5253 |
723
- | 2.1258 | 0.98 | 3270000 | 2.1523 | 0.5255 |
724
- | 2.1286 | 0.98 | 3275000 | 2.1523 | 0.5254 |
725
- | 2.1267 | 0.98 | 3280000 | 2.1523 | 0.5254 |
726
- | 2.13 | 0.98 | 3285000 | 2.1523 | 0.5254 |
727
- | 2.1284 | 0.99 | 3290000 | 2.1523 | 0.5255 |
728
- | 2.1295 | 0.99 | 3295000 | 2.1523 | 0.5254 |
729
- | 2.1241 | 0.99 | 3300000 | 2.1523 | 0.5256 |
730
- | 2.1297 | 0.99 | 3305000 | 2.1523 | 0.5258 |
731
- | 2.126 | 0.99 | 3310000 | 2.1504 | 0.5256 |
732
- | 2.1263 | 0.99 | 3315000 | 2.1504 | 0.5256 |
733
- | 2.1273 | 0.99 | 3320000 | 2.1504 | 0.5256 |
734
- | 2.1214 | 1.0 | 3325000 | 2.1504 | 0.5255 |
735
- | 2.1275 | 1.0 | 3330000 | 2.1504 | 0.5256 |
736
- | 2.1227 | 1.0 | 3335000 | 2.1504 | 0.5258 |
737
 
738
 
739
  ### Framework versions
@@ -741,4 +778,4 @@ The following hyperparameters were used during training:
741
  - Transformers 4.30.2
742
  - Pytorch 2.0.0
743
  - Datasets 2.13.1
744
- - Tokenizers 0.13.3
 
1
  ---
2
+ language:
3
+ - en
4
+ - es
5
+ - ca
6
+ licence: apache-2.0
7
  tags:
8
+ - spanish
9
+ - catalan
10
+ - falcon-7b
11
  datasets:
12
+ - BSC-LT/open_data_26B_tokens_balanced_es_ca
13
  metrics:
14
+ - ppl
15
  model-index:
16
  - name: falcon_7b_balanced_tokenizer_fp16_CPT_open_data_26B_tokens_balanced_es_ca
17
  results:
 
19
  name: Causal Language Modeling
20
  type: text-generation
21
  dataset:
22
+ name: BSC-LT/open_data_26B_tokens_balanced_es_ca
23
+ type: Causal Language Modeling
 
24
  config: default
25
  split: validation
26
  args: default
27
  metrics:
28
+ - name: Perplexity
29
+ type: ppl
30
+ value: 8.59
31
+ widget:
32
+ - text: |-
33
+ Respòn a la pregunta següent.
34
+ Pregunta: "Qui viu a França?"
35
+ Resposta: "A França viuen els francesos."
36
+ ----
37
+ Respòn a la pregunta següent.
38
+ Pregunta: "Quina és la capital de Suècia?"
39
+ Resposta: "La capital de Suècia és Estocolm."
40
+ ----
41
+ Respòn a la pregunta següent.
42
+ Pregunta: "Quina beguda es consumeix als matins per despertar-se?"
43
+ Resposta: "La majoria de gent consumeix cafè per despertar-se."
44
+ ----
45
+ Respòn a la pregunta següent.
46
+ Pregunta: "Qui és Leo Messi?"
47
+ Resposta:
48
+ example_title: Pregunta-Resposta
49
+ license: apache-2.0
50
+ pipeline_tag: text-generation
51
  ---
52
 
 
 
 
53
  # falcon_7b_balanced_tokenizer_fp16_CPT_open_data_26B_tokens_balanced_es_ca
54
 
55
+ ## Overview
56
+
57
+ This model is a new result towards the long-run problem of "What is the best strategy for training a model in my language (not English)?"
58
+
59
+ This model adapts the [falcon-7b](https://huggingface.co/tiiuae/falcon-7b) to 2 new target languages Spanish and Catalan by swapping the tokenizer and adjusting the embedding layer before training with 26B tokens in the target language.
60
+
61
+ ## Embedding layer adaptation
62
+
63
+ When adapting a model from English to other languages the tokenizer plays a crucial role.
64
+ In our case the tokenization of a
65
+
66
+ If the tokenizer does not include the target language in its training data, the resulting model will need many more tokens to perform the same task.
67
+ We solve this problem by creating a new tokenizer in the target languages (Spanish and Catalan) and adapting the embedding layer by only reordering the embeddings of the shared tokens of both tokenizers and initializing the rest to the average of all embeddings.
68
+
69
+ fine-tuned version of [/bscdata/models/falcon_7b_balanced_tokenizer_fp16/](https://huggingface.co//bscdata/models/falcon_7b_balanced_tokenizer_fp16/) on the /bscdata/data/open_data_26B_tokens_balanced_es_ca/open_data_26B_tokens_balanced_es_ca.py default dataset.
70
  It achieves the following results on the evaluation set:
71
  - Loss: 2.1504
72
  - Accuracy: 0.5258
 
691
  | 2.1148 | 0.88 | 2925000 | 0.5267 | 2.1445 |
692
  | 2.1188 | 0.88 | 2930000 | 0.5270 | 2.1445 |
693
  | 2.1118 | 0.88 | 2935000 | 0.5270 | 2.1445 |
694
+ | 2.1283 | 0.88 | 2940000 | 0.5244 | 2.1582 |
695
+ | 2.1336 | 0.88 | 2945000 | 0.5240 | 2.1621 |
696
+ | 2.1311 | 0.88 | 2950000 | 0.5237 | 2.1621 |
697
+ | 2.1377 | 0.89 | 2955000 | 0.5236 | 2.1641 |
698
+ | 2.136 | 0.89 | 2960000 | 0.5236 | 2.1641 |
699
+ | 2.1394 | 0.89 | 2965000 | 0.5233 | 2.1641 |
700
+ | 2.1405 | 0.89 | 2970000 | 0.5233 | 2.1660 |
701
+ | 2.1391 | 0.89 | 2975000 | 0.5236 | 2.1660 |
702
+ | 2.1353 | 0.89 | 2980000 | 0.5234 | 2.1660 |
703
+ | 2.1392 | 0.89 | 2985000 | 0.5234 | 2.1660 |
704
+ | 2.1384 | 0.9 | 2990000 | 0.5235 | 2.1660 |
705
+ | 2.1373 | 0.9 | 2995000 | 0.5233 | 2.1660 |
706
+ | 2.1346 | 0.9 | 3000000 | 0.5234 | 2.1660 |
707
+ | 2.1368 | 0.9 | 3005000 | 0.5235 | 2.1660 |
708
+ | 2.1383 | 0.9 | 3010000 | 0.5233 | 2.1660 |
709
+ | 2.1447 | 0.9 | 3015000 | 0.5233 | 2.1660 |
710
+ | 2.1392 | 0.9 | 3020000 | 0.5234 | 2.1660 |
711
+ | 2.1359 | 0.91 | 3025000 | 0.5233 | 2.1660 |
712
+ | 2.1408 | 0.91 | 3030000 | 0.5233 | 2.1660 |
713
+ | 2.1437 | 0.91 | 3035000 | 0.5233 | 2.1660 |
714
+ | 2.1354 | 0.91 | 3040000 | 0.5233 | 2.1660 |
715
+ | 2.1371 | 0.91 | 3045000 | 0.5235 | 2.1660 |
716
+ | 2.1399 | 0.91 | 3050000 | 0.5234 | 2.1660 |
717
+ | 2.1387 | 0.92 | 3055000 | 0.5234 | 2.1660 |
718
+ | 2.1406 | 0.92 | 3060000 | 0.5232 | 2.1660 |
719
+ | 2.1387 | 0.92 | 3065000 | 0.5235 | 2.1660 |
720
+ | 2.1413 | 0.92 | 3070000 | 0.5235 | 2.1660 |
721
+ | 2.1371 | 0.92 | 3075000 | 0.5235 | 2.1641 |
722
+ | 2.138 | 0.92 | 3080000 | 0.5235 | 2.1641 |
723
+ | 2.1385 | 0.92 | 3085000 | 0.5236 | 2.1641 |
724
+ | 2.135 | 0.93 | 3090000 | 0.5234 | 2.1660 |
725
+ | 2.1401 | 0.93 | 3095000 | 0.5236 | 2.1641 |
726
+ | 2.1374 | 0.93 | 3100000 | 0.5236 | 2.1641 |
727
+ | 2.1358 | 0.93 | 3105000 | 0.5237 | 2.1641 |
728
+ | 2.1344 | 0.93 | 3110000 | 0.5239 | 2.1621 |
729
+ | 2.1368 | 0.93 | 3115000 | 0.5239 | 2.1621 |
730
+ | 2.1345 | 0.93 | 3120000 | 0.5237 | 2.1621 |
731
+ | 2.1358 | 0.94 | 3125000 | 0.5239 | 2.1621 |
732
+ | 2.1395 | 0.94 | 3130000 | 0.5239 | 2.1621 |
733
+ | 2.1359 | 0.94 | 3135000 | 0.5243 | 2.1621 |
734
+ | 2.1373 | 0.94 | 3140000 | 0.5242 | 2.1602 |
735
+ | 2.1357 | 0.94 | 3145000 | 0.5243 | 2.1602 |
736
+ | 2.1354 | 0.94 | 3150000 | 0.5244 | 2.1602 |
737
+ | 2.1323 | 0.95 | 3155000 | 0.5244 | 2.1602 |
738
+ | 2.133 | 0.95 | 3160000 | 0.5242 | 2.1602 |
739
+ | 2.1315 | 0.95 | 3165000 | 0.5244 | 2.1602 |
740
+ | 2.1363 | 0.95 | 3170000 | 0.5243 | 2.1602 |
741
+ | 2.1349 | 0.95 | 3175000 | 0.5245 | 2.1602 |
742
+ | 2.1336 | 0.95 | 3180000 | 0.5244 | 2.1602 |
743
+ | 2.1364 | 0.95 | 3185000 | 0.5244 | 2.1582 |
744
+ | 2.133 | 0.96 | 3190000 | 0.5243 | 2.1582 |
745
+ | 2.1349 | 0.96 | 3195000 | 0.5245 | 2.1582 |
746
+ | 2.134 | 0.96 | 3200000 | 0.5246 | 2.1582 |
747
+ | 2.1308 | 0.96 | 3205000 | 0.5249 | 2.1562 |
748
+ | 2.1302 | 0.96 | 3210000 | 0.5247 | 2.1562 |
749
+ | 2.1302 | 0.96 | 3215000 | 0.5247 | 2.1562 |
750
+ | 2.1331 | 0.96 | 3220000 | 0.5248 | 2.1562 |
751
+ | 2.1273 | 0.97 | 3225000 | 0.5247 | 2.1562 |
752
+ | 2.1286 | 0.97 | 3230000 | 0.5250 | 2.1562 |
753
+ | 2.1282 | 0.97 | 3235000 | 0.5250 | 2.1543 |
754
+ | 2.1309 | 0.97 | 3240000 | 0.5251 | 2.1543 |
755
+ | 2.1295 | 0.97 | 3245000 | 0.5254 | 2.1543 |
756
+ | 2.1275 | 0.97 | 3250000 | 0.5254 | 2.1543 |
757
+ | 2.133 | 0.98 | 3255000 | 0.5254 | 2.1543 |
758
+ | 2.1301 | 0.98 | 3260000 | 0.5251 | 2.1543 |
759
+ | 2.1314 | 0.98 | 3265000 | 0.5253 | 2.1523 |
760
+ | 2.1258 | 0.98 | 3270000 | 0.5255 | 2.1523 |
761
+ | 2.1286 | 0.98 | 3275000 | 0.5254 | 2.1523 |
762
+ | 2.1267 | 0.98 | 3280000 | 0.5254 | 2.1523 |
763
+ | 2.13 | 0.98 | 3285000 | 0.5254 | 2.1523 |
764
+ | 2.1284 | 0.99 | 3290000 | 0.5255 | 2.1523 |
765
+ | 2.1295 | 0.99 | 3295000 | 0.5254 | 2.1523 |
766
+ | 2.1241 | 0.99 | 3300000 | 0.5256 | 2.1523 |
767
+ | 2.1297 | 0.99 | 3305000 | 0.5258 | 2.1523 |
768
+ | 2.126 | 0.99 | 3310000 | 0.5256 | 2.1504 |
769
+ | 2.1263 | 0.99 | 3315000 | 0.5256 | 2.1504 |
770
+ | 2.1273 | 0.99 | 3320000 | 0.5256 | 2.1504 |
771
+ | 2.1214 | 1.0 | 3325000 | 0.5255 | 2.1504 |
772
+ | 2.1275 | 1.0 | 3330000 | 0.5256 | 2.1504 |
773
+ | 2.1227 | 1.0 | 3335000 | 0.5258 | 2.1504 |
774
 
775
 
776
  ### Framework versions
 
778
  - Transformers 4.30.2
779
  - Pytorch 2.0.0
780
  - Datasets 2.13.1
781
+ - Tokenizers 0.13.3