lfoppiano commited on
Commit
a5baed0
1 Parent(s): b3b6a18

Upload config-docker.yml

Browse files
Files changed (1) hide show
  1. config-docker.yml +170 -0
config-docker.yml ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "0.8.0"
2
+
3
+ corpusPath: "./resources/dataset/dataseer/corpus"
4
+ templatePath: "./resources/dataset/dataseer/crfpp-templates/dataseer.template"
5
+ grobidHome: "/opt/grobid/grobid-home"
6
+ tmpPath: "/opt/grobid/grobid-home/tmp/"
7
+
8
+ # path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI
9
+ pub2teiPath: "/opt/Pub2TEI/"
10
+
11
+ gluttonHost: "https://cloud.science-miner.com/glutton"
12
+ gluttonPort:
13
+
14
+ # entity-fishing server information for performing entity disambiguation
15
+ # for https, indicate 443 as port
16
+ entityFishingHost: cloud.science-miner.com/nerd
17
+ entityFishingPort: 443
18
+ #entityFishingHost: localhost
19
+ #entityFishingPort: 8090
20
+
21
+ # if true we use binary classifiers for the contexts, otherwise use a single multi-label classifier
22
+ # binary classifiers perform better, but havier to use
23
+ useBinaryContextClassifiers: false
24
+
25
+ # sequence labeling model (identify data-related sections)
26
+ models:
27
+
28
+ # model for zones
29
+ - name: "dataseer"
30
+ engine: "wapiti"
31
+ #engine: "delft"
32
+ wapiti:
33
+ # wapiti training parameters, they will be used at training time only
34
+ epsilon: 0.00001
35
+ window: 20
36
+ nbMaxIterations: 2000
37
+
38
+ # classifier model, dataset binary (datset or not dataset in the current sentence)
39
+ - name: "dataseer-binary"
40
+ engine: "delft"
41
+ delft:
42
+ # deep learning parameters
43
+ #architecture: "gru"
44
+ architecture: "bert"
45
+ #embeddings_name: "word2vec"
46
+ transformer: "allenai/scibert_scivocab_cased"
47
+
48
+ # identification of the data type (first level hierarchy)
49
+ - name: "dataseer-first"
50
+ engine: "delft"
51
+ delft:
52
+ # deep learning parameters
53
+ #architecture: "gru"
54
+ architecture: "bert"
55
+ #embeddings_name: "word2vec"
56
+ transformer: "allenai/scibert_scivocab_cased"
57
+
58
+ # mention context classification (reuse binary for the moment)
59
+ - name: "dataseer-reuse"
60
+ engine: "delft"
61
+ delft:
62
+ # deep learning parameters
63
+ #architecture: "gru"
64
+ architecture: "bert"
65
+ #embeddings_name: "word2vec"
66
+ transformer: "allenai/scibert_scivocab_cased"
67
+
68
+ # model for dataset mention recognition
69
+ - name: "datasets"
70
+ #engine: "wapiti"
71
+ engine: "delft"
72
+ wapiti:
73
+ # wapiti training parameters, they will be used at training time only
74
+ epsilon: 0.00001
75
+ window: 20
76
+ nbMaxIterations: 2000
77
+ delft:
78
+ # deep learning parameters
79
+ #architecture: "BidLSTM_CRF"
80
+ architecture: "BERT_CRF"
81
+ #transformer: "allenai/scibert_scivocab_cased"
82
+ transformer: "michiyasunaga/LinkBERT-basecased"
83
+ #useELMo: true
84
+ #embeddings_name: "glove-840B"
85
+ runtime:
86
+ # parameters used at runtime/prediction
87
+ max_sequence_length: 200
88
+ #max_sequence_length: 300
89
+ batch_size: 20
90
+
91
+ - name: "context"
92
+ engine: "delft"
93
+ delft:
94
+ #architecture: "gru"
95
+ #embeddings_name: "glove-840B"
96
+ architecture: "bert"
97
+ transformer: "michiyasunaga/LinkBERT-basecased"
98
+
99
+ - name: "context_used"
100
+ engine: "delft"
101
+ delft:
102
+ #architecture: "gru"
103
+ #embeddings_name: "glove-840B"
104
+ architecture: "bert"
105
+ transformer: "michiyasunaga/LinkBERT-basecased"
106
+
107
+ - name: "context_creation"
108
+ engine: "delft"
109
+ delft:
110
+ #architecture: "gru"
111
+ #embeddings_name: "glove-840B"
112
+ architecture: "bert"
113
+ transformer: "michiyasunaga/LinkBERT-basecased"
114
+
115
+ - name: "context_shared"
116
+ engine: "delft"
117
+ delft:
118
+ #architecture: "gru"
119
+ #embeddings_name: "glove-840B"
120
+ architecture: "bert"
121
+ transformer: "michiyasunaga/LinkBERT-basecased"
122
+
123
+ # Limit the maximum number of requests (0, no limit)
124
+ maxParallelRequests: 0
125
+
126
+ # CORS configuration for the web API service
127
+ corsAllowedOrigins: "*"
128
+ corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD"
129
+ corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin"
130
+
131
+ server:
132
+ type: custom
133
+ idleTimeout: 120 seconds
134
+ applicationConnectors:
135
+ - type: http
136
+ port: 8065
137
+ adminConnectors:
138
+ - type: http
139
+ port: 8066
140
+ registerDefaultExceptionMappers: false
141
+ maxThreads: 2048
142
+ maxQueuedRequests: 2048
143
+ acceptQueueSize: 2048
144
+ requestLog:
145
+ appenders: []
146
+
147
+ # these logging settings apply to the service usage mode
148
+ logging:
149
+ level: INFO
150
+ loggers:
151
+ org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF"
152
+ org.glassfish.jersey.internal: "OFF"
153
+ com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF"
154
+ appenders:
155
+ - type: console
156
+ threshold: INFO
157
+ timeZone: UTC
158
+ # uncomment to have the logs in json format
159
+ #layout:
160
+ # type: json
161
+ # - type: file
162
+ # currentLogFilename: logs/datastet-service.log
163
+ # threshold: INFO
164
+ # archive: true
165
+ # archivedLogFilenamePattern: logs/datastet-service-%d.log
166
+ # archivedFileCount: 7
167
+ # timeZone: UTC
168
+ # uncomment to have the logs in json format
169
+ #layout:
170
+ # type: json