willwade commited on
Commit
5d53663
1 Parent(s): 9d13fea

retrained with new data from opensubs. qwerty subs

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "finetuned-model/",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "t5-small",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "eval"]}
eval/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e3914c2653952d5ef669d4dec5cfa59bd7587d0999b3038929d02ef51c3f3f7
3
+ size 187010360
eval/dataset_info.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "csv",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "csv",
6
+ "dataset_size": 472235292,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "/home/ubuntu/wwdrive2/14March/clean_train.csv": {
10
+ "num_bytes": 369392984,
11
+ "checksum": null
12
+ },
13
+ "/home/ubuntu/wwdrive2/14March/clean_eval.csv": {
14
+ "num_bytes": 71786407,
15
+ "checksum": null
16
+ }
17
+ },
18
+ "download_size": 441179391,
19
+ "features": {
20
+ "input_ids": {
21
+ "feature": {
22
+ "dtype": "int32",
23
+ "_type": "Value"
24
+ },
25
+ "_type": "Sequence"
26
+ },
27
+ "attention_mask": {
28
+ "feature": {
29
+ "dtype": "int8",
30
+ "_type": "Value"
31
+ },
32
+ "_type": "Sequence"
33
+ },
34
+ "labels": {
35
+ "feature": {
36
+ "dtype": "int64",
37
+ "_type": "Value"
38
+ },
39
+ "_type": "Sequence"
40
+ }
41
+ },
42
+ "homepage": "",
43
+ "license": "",
44
+ "size_in_bytes": 913414683,
45
+ "splits": {
46
+ "train": {
47
+ "name": "train",
48
+ "num_bytes": 394316099,
49
+ "num_examples": 4507525,
50
+ "dataset_name": "csv"
51
+ },
52
+ "eval": {
53
+ "name": "eval",
54
+ "num_bytes": 77919193,
55
+ "num_examples": 1127410,
56
+ "dataset_name": "csv"
57
+ }
58
+ },
59
+ "version": {
60
+ "version_str": "0.0.0",
61
+ "major": 0,
62
+ "minor": 0,
63
+ "patch": 0
64
+ }
65
+ }
eval/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "150697196cb85c2e",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "eval"
13
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7570f8c76e695d91c713c25c55aed0bc65343df9f4b9aa20cf3f595ff6114e32
3
  size 241984552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a20c7f6786661d16cdff57b5432726582b676eb3c02e5f7a869c58db2dd3de
3
  size 241984552
tokenizer_config.json CHANGED
@@ -930,12 +930,8 @@
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
- "max_length": 512,
934
  "model_max_length": 512,
935
  "pad_token": "<pad>",
936
- "stride": 0,
937
  "tokenizer_class": "T5Tokenizer",
938
- "truncation_side": "right",
939
- "truncation_strategy": "longest_first",
940
  "unk_token": "<unk>"
941
  }
 
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
 
933
  "model_max_length": 512,
934
  "pad_token": "<pad>",
 
935
  "tokenizer_class": "T5Tokenizer",
 
 
936
  "unk_token": "<unk>"
937
  }
train/data-00000-of-00002.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee121506abd0676a5266c0592070a202642789ef9a38fd5876aa403135c44a04
3
+ size 446399184
train/data-00001-of-00002.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc62e5c0f5a5137d80e124fb9156e0a60274f953ed6b95347cf7c32df324d25
3
+ size 425352424
train/dataset_info.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "csv",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "csv",
6
+ "dataset_size": 472235292,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "/home/ubuntu/wwdrive2/14March/clean_train.csv": {
10
+ "num_bytes": 369392984,
11
+ "checksum": null
12
+ },
13
+ "/home/ubuntu/wwdrive2/14March/clean_eval.csv": {
14
+ "num_bytes": 71786407,
15
+ "checksum": null
16
+ }
17
+ },
18
+ "download_size": 441179391,
19
+ "features": {
20
+ "input_ids": {
21
+ "feature": {
22
+ "dtype": "int32",
23
+ "_type": "Value"
24
+ },
25
+ "_type": "Sequence"
26
+ },
27
+ "attention_mask": {
28
+ "feature": {
29
+ "dtype": "int8",
30
+ "_type": "Value"
31
+ },
32
+ "_type": "Sequence"
33
+ },
34
+ "labels": {
35
+ "feature": {
36
+ "dtype": "int64",
37
+ "_type": "Value"
38
+ },
39
+ "_type": "Sequence"
40
+ }
41
+ },
42
+ "homepage": "",
43
+ "license": "",
44
+ "size_in_bytes": 913414683,
45
+ "splits": {
46
+ "train": {
47
+ "name": "train",
48
+ "num_bytes": 394316099,
49
+ "num_examples": 4507525,
50
+ "dataset_name": "csv"
51
+ },
52
+ "eval": {
53
+ "name": "eval",
54
+ "num_bytes": 77919193,
55
+ "num_examples": 1127410,
56
+ "dataset_name": "csv"
57
+ }
58
+ },
59
+ "version": {
60
+ "version_str": "0.0.0",
61
+ "major": 0,
62
+ "minor": 0,
63
+ "patch": 0
64
+ }
65
+ }
train/state.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00002.arrow"
5
+ },
6
+ {
7
+ "filename": "data-00001-of-00002.arrow"
8
+ }
9
+ ],
10
+ "_fingerprint": "26c810267bb075b4",
11
+ "_format_columns": null,
12
+ "_format_kwargs": {},
13
+ "_format_type": null,
14
+ "_output_all_columns": false,
15
+ "_split": "train"
16
+ }