Leonardo Yoshida commited on
Commit
54f5ec9
1 Parent(s): 27c8c74

feat: add initial trained model and scripts

Browse files

* Pretrained a model;
* Add a training script with docker;
* Add a test script to try the model;

.dockerignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv
README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - grammar
5
+ - spell
6
+ - correction
7
+ ---
8
+
9
+ # Spellcheck Model
10
+
11
+ ## Installation
12
+ 1. Install Python 3.8 or higher and Docker;
13
+ 2. Clone the repo;
14
+ 3. Create a virtual environment: `python3 -m venv venv`;
15
+ 4. Activate the virtual environment: `source venv/bin/activate`;
16
+ 5. Install dependencies: `pip install -r requirements.txt`;
17
+
18
+ #
19
+
20
+ ## Usage
21
+ ### Training
22
+ There's a `data` directory with two files:
23
+ 1. `eval.csv`: evaluation dataset;
24
+ 2. `train.csv`: training dataset;
25
+ Both are in the same format:
26
+ ```txt
27
+ input,target
28
+ "grammar: som sentense","some sentence"
29
+ "grammar: anoder centence","another sentence"
30
+ ...
31
+ ```
32
+
33
+ You can edit them to use your own dataset.
34
+
35
+ Now if you want to fine-tune the existing model here, just run the following commands:
36
+ ```bash
37
+ docker build -t spellcheck_train .
38
+ docker run -v $(pwd):/app spellcheck_train
39
+ ```
40
+ Also make sure you have docker running on your machine.
41
+ ### Testing
42
+ If you want to test your model, just run the following command:
43
+ ```bash
44
+ python test.py
45
+ ```
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": ".",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 3072,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "gradient_checkpointing": false,
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.30.2",
59
+ "use_cache": true,
60
+ "vocab_size": 32128
61
+ }
data/eval.csv ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ input,target
2
+ "grammar: I luv goin for lng waks in the prk.","I love going for long walks in the park."
3
+ "grammar: The quik brown fox jumps ovr the lzy dg.","The quick brown fox jumps over the lazy dog."
4
+ "grammar: I enjoye playing the pino in my fre time.","I enjoy playing the piano in my free time."
5
+ "grammar: The sun rises in the est and sets in the west.","The sun rises in the east and sets in the west."
6
+ "grammar: I lik to reed boks in my spair time.","I like to read books in my spare time."
7
+ "grammar: The cat is siting on the mat.","The cat is sitting on the mat."
8
+ "grammar: I hav a pashun for fotografi.","I have a passion for photography."
9
+ "grammar: The Erth revolvs around the Son.","The Earth revolves around the Sun."
10
+ "grammar: I lik to watch moovies on weekendz.","I like to watch movies on weekends."
11
+ "grammar: The rane is poring outside.","The rain is pouring outside."
12
+ "grammar: I have a lot of work to do todai.","I have a lot of work to do today."
13
+ "grammar: The mun shines brite in the nite sky.","The moon shines brightly in the night sky."
14
+ "grammar: I enjoye spnding time with my famili.","I enjoy spending time with my family."
15
+ "grammar: The coffe is brewing in the kichen.","The coffee is brewing in the kitchen."
16
+ "grammar: I lik to go swiming in the sumer.","I like to go swimming in the summer."
17
+ "grammar: The birds are chirpin in the trez.","The birds are chirping in the trees."
18
+ "grammar: I am studyng compute science at universiti.","I am studying computer science at university."
19
+ "grammar: The car is parkd in the garaje.","The car is parked in the garage."
20
+ "grammar: I luv eating icecrem on hot dais.","I love eating ice cream on hot days."
21
+ "grammar: The flowers are blooming in the gardn.","The flowers are blooming in the garden."
22
+ "grammar: I enjoy hikng in the mountins.","I enjoy hiking in the mountains."
23
+ "grammar: The air is fresh and cris in the morning.","The air is fresh and crisp in the morning."
24
+ "grammar: I lik to trvl and explore new places.","I like to travel and explore new places."
25
+ "grammar: The clok is tikin on the wal.","The clock is ticking on the wall."
26
+ "grammar: I have a meting at 2 PM.","I have a meeting at 2 PM."
27
+ "grammar: The children are playin in the playgrund.","The children are playing in the playground."
28
+ "grammar: I enjoye coking and trying new recipz.","I enjoy cooking and trying new recipes."
29
+ "grammar: The bus is ariving at the stacion.","The bus is arriving at the station."
30
+ "grammar: I lik to go for a run in the evening.","I like to go for a run in the evening."
31
+ "grammar: The wavs are crashin on the shore.","The waves are crashing on the shore."
data/train.csv ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ input,target
2
+ "grammar: they fought a deadly wear","they fought a deadly war"
3
+ "grammar: thier house was father away from my place","their house was farther away from my place"
4
+ "grammar: So I think we would not be live if our ancestors did not develop siences and tecnologies .","So I think we would not be alive if our ancestors did not develop sciences and technologies ."
5
+ "grammar: Imagine yourself you are working in factory just to do one thing like put air a on car if they fire you you will be destroyed , becouse you do n't know more than to put air a in car .","Imagine yourself you are working in factory just to do one thing like put air a on car if they fire you you will be destroyed , because you do n't know more than to put air a in car ."
6
+ "grammar: For example , they can play football whenever they want , but the olders can not .","For example , they can play football whenever they want , but the elders can not ."
7
+ "grammar: While It is true that consumers preffer to buy products with lower prices , when international companies that are already certified begin to send their products to market , people will preffer to consume those goods because the difference in price will probbably not affect them too much .","While It is true that consumers prefer to buy products with lower prices , when international companies that are already certified begin to send their products to market , people will prefer to consume those goods because the difference in price will probably not affect them too much ."
8
+ "grammar: And young people spend more time on ther lifestyles .","And young people spend more time on their lifestyles ."
9
+ "grammar: Students can focus on only a few subjects they are intwerested in and they will become experts in those areas .","Students can focus on only a few subjects they are interested in and they will become experts in those areas ."
10
+ "grammar: He thinks differently than others and he has succeded .","He thinks differently than others and he has succeeded ."
11
+ "grammar: These activities make the community a better place to live and include these values in all the members .","These activities make the community a better place to live and include those values in all the members ."
12
+ "grammar: all the broad knowledge helps they to understand their major in university classes , as well as help they to make a correct choice in a specialized area .","all the broad knowledge helps them to understand their major in university classes , as well as help them to make a correct choice in a specialized area ."
13
+ "grammar: I could very easily understand why he would always brang bad marks home .","I could very easily understand why he would always bring bad marks home ."
14
+ "grammar: Then , when we went to the Science Olympiad , she had diverse knowledge about the subects , and it was not new for her to think about new things .","Then , when we went to the Science Olympiad , she had diverse knowledge about the subjects , and it was not new for her to think about new things ."
15
+ "grammar: If every person tries to learn and understand lots of scientifc subjects , no person will do it and , as a result of this , no science will be improved .","If every person tries to learn and understand lots of scientific subjects , no person will do it and , as a result of this , no science will be improved ."
16
+ "grammar: they will run out very soon at the current rate of utilisation .","they will run out very soon at the current rate of utilization ."
17
+ "grammar: and it will put your maind into non-stop learning .","and it will put your mind into non-stop learning ."
18
+ "grammar: In today 's world Compuer skills are the first important life skill .","In today 's world Computer skills are the first important life skill ."
19
+ "grammar: I have never stopped myself to think this , but this is a real possibility for the fucture .","I have never stopped myself to think this , but this is a real possibility for the future ."
20
+ "grammar: If we conseder an idea as a totally autonomous concept within the individual process of defining reality , I think the ideas are useless , and not based on experience .","If we consider an idea as a totally autonomous concept within the individual process of defining reality , I think the ideas are useless , and not based on experience ."
21
+ "grammar: If there are specialized docters that have done the operation often , he becomes talented in his job .","If there are specialized doctors that have done the operation often , he becomes talented in his job ."
22
+ "grammar: It will be good situstion .","It will be good situation ."
23
+ "grammar: the school teachers there are the ones who create the future of the younger genaration so we have to teach them better .","the school teachers there are the ones who create the future of the younger generation so we have to teach them better ."
24
+ "grammar: some tour guide will want to set maximum security to make the tour difficult because you will only have a wonderful view throught the bus .","some tour guide will want to set maximum security to make the tour difficult because you will only have a wonderful view through the bus ."
25
+ "grammar: this will effect exams .","this will affect exams ."
26
+ "grammar: But on the other hand ther are also people that are often convinced by the interesting advertisements they see everywhere , because of this many times they have to face problems with the product that they bought and many times ther is no way to give back that product .","But on the other hand there are also people that are often convinced by the interesting advertisements they see everywhere , because of this many times they have to face problems with the product that they bought and many times there is no way to give back that product ."
27
+ "grammar: Ever increasing competancy rates force them into frequent business model changes for a compatible transitional flexibility .","Ever increasing competency rates force them into frequent business model changes for a compatible transitional flexibility ."
28
+ "grammar: However , this reading passage casts douts on the speaker 's mention .","However , this reading passage casts doubts on the speaker 's mention ."
29
+ "grammar: For example , in the 2 0 0 6 world cup form Germany , many coaches on a work .","For example , in the 2 0 0 6 world cup form Germany , many coaches on a team ."
30
+ "grammar: Many Scientists obtained clear results of investigations after the facts were on the table , but before they could even begin to theorise about them .","Many Scientists obtained clear results of investigations after the facts were on the table , but before they could even begin to theorize about them ."
31
+ "grammar: They want to make people understend thet their product is the best and you can really trust it .","They want to make people understand that their product is the best and you can really trust it ."
32
+ "grammar: Video is convenient , but if teachers are concerned about students , using texetbooks is a good ability for students .","Video is convenient , but if teachers are concerned about students , using textbooks is a good ability for students ."
33
+ "grammar: In my opinion , it is dependend on a particular person .","In my opinion , it is dependent on a particular person ."
34
+ "grammar: for example when we talk about speed they must anderstand why it is dangerous it is beteer than if they have an accident and learn after that .","for example when we talk about speed they must understand why it is dangerous it is better than if they have an accident and learn after that ."
35
+ "grammar: Therefore peaple will be able to live with the automobile sosiety and nature for the future .","Therefore people will be able to live with the automobile society and nature for the future ."
36
+ "grammar: finally , the third piece of evidence that birds use a type of internal compass is that birds have crystals of the mineral magetite embedded in their break .","finally , the third piece of evidence that birds use a type of internal compass is that birds have crystals of the mineral magnetite embedded in their break ."
37
+ "grammar: The lecture says it is more important to provide enough fish to the people .","The lecturer says it is more important to provide enough fish to the people ."
38
+ "grammar: Futhermore , a tour guide will also provide safety and security for travel , since they already know the dos and don'ts of the tour .","Furthermore , a tour guide will also provide safety and security for travel , since they already know the dos and don'ts of the tour ."
39
+ "grammar: Which caused her situation to worse .","Which caused her situation to worsen ."
40
+ "grammar: This arguement is not only true now , it has always been .","This argument is not only true now , it has always been ."
41
+ "grammar: The Last thing they have to study is disease , which means they will be save .","The Last thing they have to study is disease , which means they will be safe ."
42
+ "grammar: There are very successful politicians that have never tried somthing new .","There are very successful politicians that have never tried something new ."
dockerfile ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ WORKDIR /app
3
+ COPY . .
4
+ RUN pip install --no-cache-dir -r requirements.txt
5
+ CMD ["python", "train.py"]
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2"
7
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fbfd83d5242b7be46067a12c862a3c9345cff788fb38dfa329bc4620761f721
3
+ size 891699345
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ accelerate==0.20.3
2
+ happytransformer==2.4.1
special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
test.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from happytransformer import HappyTextToText, TTSettings
2
+
3
+ input_text = input('Enter your mispelled text: ')
4
+ happy_tt = HappyTextToText('T5', load_path='.')
5
+
6
+ beam_settings = TTSettings(num_beams=5, min_length=1, max_length=100)
7
+
8
+ output_text_1 = happy_tt.generate_text(
9
+ f'grammar: {input_text}',
10
+ args=beam_settings,
11
+ )
12
+ print(output_text_1.text)
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "tokenizer_class": "T5Tokenizer",
110
+ "unk_token": "<unk>"
111
+ }
train.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from happytransformer import HappyTextToText, TTTrainArgs
2
+
3
+ happy_tt = HappyTextToText("T5", load_path=".")
4
+
5
+ args = TTTrainArgs(batch_size=8)
6
+ happy_tt.train("data/train.csv", args=args)
7
+
8
+ before_loss = happy_tt.eval("data/eval.csv")
9
+ print("After loss: ", before_loss.loss)
10
+
11
+ happy_tt.save('.')