File size: 2,504 Bytes
9fa4f9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
MTEngine:
    MTengine: Marian
    #one of Marian, OpenNMT, Moses, GoogleTranslate, DeepL, Lucy
    startMTEngine: True
    startCommand: "./marian-server-GPU -m model.bin -v vocab-es.yml vocab-en.yml -p 8200 --n-best  --alignment hard  --normalize 1 --quiet &"
    IP: localhost
    port: 8200
    min_len_factor: 0.5
    
MTUOCServer:
  port: 8000
  type: MTUOC
  #one of MTUOC, Moses, ModernMT, OpenNMT, NMTWizard
  verbosity_level: 3
  
  checkistranslatable: True
  restore_tags: True
  fix_xml: True
  
  URLs: True
  code_URLs: "@URL@"
  EMAILs: True
  code_EMAILs: "@EMAIL@"
  
  replaceNUMs: True
  
  splitNUMs: False
  
  min_chars_segment: 2  
  translation_selection_strategy: First
  log_file: log.log
  ONMT_url_root: "/translator"
  
SRX_file: segment.srx


Preprocess:
  sl_lang: es
  tl_lang: en
  tokenize_SL: False
  sl_tokenizer: MTUOC_tokenizer_spa.py
  #one of the MTUOC-tokenizers or state Moses if you use Moses tokenizers
  tl_tokenizer: MTUOC_tokenizer_eng.py
  #one of the MTUOC-tokenizers or state Moses if you use Moses tokenizers
  
  segment_input: True
  SRXfile: segmentSC.srx
  SRXlang: Spanish
  
  replaceNUMs: False
  code_NUMs: "@NUM@"
  splitNUMs: False
  #Truecasing
  tcmodel: tc.es
  #or None if not used
  truecase: upper
  #one of None, all, upper
  ###SentencePiece###
  sentencepiece: True
  sp_model_SL: spmodel.model
  sp_model_TL: spmodel.model
  sp_splitter: "▁"
  
  #BPE
  BPE: False
  #True or False: if sentencepiece is set to True, sentencepiece will be used
  bpecodes: codes_file
  bpe_joiner: "@@"
  
  bos_annotate: True
  bos_symbol: <s>
  #None or <s> (or other)
  eos_annotate: True
  eos_symbol: </s>

#hmtl entities

unescape_html_input: True
escape_html_input: False

unescape_html_output: True
escape_html_output: False 

change_input_files: None
change_output_files: None
change_translation_files: None
  
GoogleTranslate:
  sllang: en
  tllang: es
  glossary: None
  #state None if no glossary is used, otherwise the name of the glossary
  project_id: XXxxXXXXX
  location: us-central1
  jsonfile: XXxxXXXXX.json
  
DeepL:
  API_key: XXxxXXXXX
  sllang: EN
  tllang: ES
  #tag_handling: html
  #one of html, xml
  formality: default
  #one of default, less, more
  split_sentences: "off"
  #one of "off", "on", "nonewlines"
  glossary: None
  
Lucy:
  url: http://XXX.XXX.XXX:8080/AutoTranslateRS/V1.3/mtrans/exec/
  TRANSLATION_DIRECTION: SPANISH-CATALAN
  MARK_UNKNOWNS: 0
  MARK_ALTERNATIVES: 0
  MARK_COMPOUNDS: 0
  CHARSET: UTF