PereLluis13
commited on
Commit
•
2ee9ca3
1
Parent(s):
37f3e35
Update README.md
Browse files
README.md
CHANGED
@@ -146,7 +146,11 @@ def extract_triplets_typed(text):
|
|
146 |
return triplets
|
147 |
|
148 |
# Load model and tokenizer
|
149 |
-
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", src_lang="en_XX", tgt_lang="tp_XX")
|
|
|
|
|
|
|
|
|
150 |
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-large")
|
151 |
gen_kwargs = {
|
152 |
"max_length": 256,
|
|
|
146 |
return triplets
|
147 |
|
148 |
# Load model and tokenizer
|
149 |
+
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", src_lang="en_XX", tgt_lang="tp_XX")
|
150 |
+
# Here we set English ("en_XX") as source language. To change the source language swap the first token of the input for your desired language or change to supported language. For catalan ("ca_XX") or greek ("el_EL") (not included in mBART pretraining) you need a workaround:
|
151 |
+
# tokenizer._src_lang = "ca_XX"
|
152 |
+
# tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids("ca_XX")
|
153 |
+
# tokenizer.set_src_lang_special_tokens("ca_XX")
|
154 |
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-large")
|
155 |
gen_kwargs = {
|
156 |
"max_length": 256,
|