Initial commit

Browse files

Files changed (8) hide show

README.md +61 -0
config.json +57 -0
flax_model.msgpack +3 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
spiece.model +3 -0
tf_model.h5 +3 -0
tokenizer.json +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,64 @@
 ---
 license: apache-2.0
 ---

 ---
+language:
+- en
+tags:
+- text2text-generation
+- paraphrase-generation
 license: apache-2.0
+widget:
+ - text: "US  to UK: My favorite color is yellow."
 ---
+### About the model
+The model has been trained on a dataset containing [249525  sentences with US English spelling](https://www.englishvoice.ai/p/us-to-uk/ "249525  sentences with US English spelling"), along with their UK English equivalent.
+The purpose of the model is to rewrite sentences from US English to UK English. It is capable not only of changing the spelling of words (such as "color" to "colour") but also changes the vocabulary appropriately (for example, "subway" to "underground", "lawyer" to "solicitor" and so on).
+### Generation examples
+| Input | Output |
+| :------------ | :------------ |
+| My favorite color is yellow. | My favourite colour is yellow. |
+| I saw a guy in yellow sneakers at the subway station. | I saw a bloke in yellow trainers at the underground station. |
+| You could have gotten hurt! | You could have got hurt! |
+### The dataset
+The dataset was developed by English Voice AI Labs. You can download it from our website:
+[https://www.EnglishVoice.ai/](https://www.EnglishVoice.ai/ "https://www.EnglishVoice.ai/")
+### Sample code
+Sample Python code:
+```python
+import torch
+from transformers import T5ForConditionalGeneration,T5Tokenizer
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = T5ForConditionalGeneration.from_pretrained("EnglishVoice/t5-base-us-to-uk-english")
+tokenizer = T5Tokenizer.from_pretrained("EnglishVoice/t5-base-us-to-uk-english")
+model = model.to(device)
+input = "My favorite color is yellow."
+text =  "US to UK: " + input
+encoding = tokenizer.encode_plus(text, return_tensors = "pt")
+input_ids = encoding["input_ids"].to(device)
+attention_masks = encoding["attention_mask"].to(device)
+beam_outputs = model.generate(
+    input_ids = input_ids,
+    attention_mask = attention_masks,
+    early_stopping = True,
+)
+result = tokenizer.decode(beam_outputs[0], skip_special_tokens=True)
+print(result)
+```
+Output:
+```My favourite colour is yellow.```

config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "_name_or_path": "upload",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "d_ff": 3072,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.17.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}

flax_model.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd1712d439251f347ecbb7c1545df5385c649b906d5c60a65f4729b0599bedd7
+size 891625348

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:802a64b11423fcbf2b35a7d7294c772d03c23abdd04af5c915370c154716f023
+size 891730879

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

tf_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9cc2c1aca8df08625a1c8a7dee8303cf117d5355aa613fa327144753d626551
+size 892144912

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff