SantiagoMoreno-UdeA commited on
Commit
42d6a0f
1 Parent(s): 3f1786c

Add files to repo

Browse files
Bash_handler.sh ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # NER software handler
3
+
4
+ if [ $# -gt 0 ]
5
+ then
6
+ MODE="$1"
7
+ STANDARD="False"
8
+ FAST="False"
9
+ CUDA="False"
10
+ UFLAG="False"
11
+ if [ ${MODE} == 'TRAIN' ]
12
+ then
13
+ shift # past argument
14
+ if [ $# -gt 1 ]
15
+ then
16
+ while [[ $# -gt 1 ]]; do
17
+ case $1 in
18
+ -f|--fast)
19
+ FAST="$2"
20
+ shift # past argument
21
+ shift # past value
22
+ ;;
23
+
24
+ -m|--model)
25
+ MODEL="$2"
26
+ shift # past argument
27
+ shift # past value
28
+ ;;
29
+
30
+ -s|--standard)
31
+ STANDARD="$2"
32
+ shift # past argument
33
+ shift # past value
34
+ ;;
35
+
36
+ -id|--inputdir)
37
+ INPUTDIR="$2"
38
+ shift # past argument
39
+ shift # past value
40
+ ;;
41
+
42
+ -u|--upsampleflag)
43
+ UFLAG="$2"
44
+ shift # past argument
45
+ shift # past value
46
+ ;;
47
+
48
+ -cu|--cuda)
49
+ CUDA="$2"
50
+ shift # past argument
51
+ shift # past value
52
+ ;;
53
+
54
+ esac
55
+ done
56
+ python src/scripts/Train_model.py -f ${FAST} -m ${MODEL} -s ${STANDARD} -id "${INPUTDIR}" -u "${UFLAG}" -cu "${CUDA}"
57
+ else
58
+ echo Not arguments the script requires at least input directory
59
+ fi
60
+
61
+
62
+ elif [ $1 == 'USE' ]
63
+ then
64
+ shift # past argument
65
+ if [ $# -gt 1 ]
66
+ then
67
+ while [[ $# -gt 1 ]]; do
68
+ case $1 in
69
+ -m|--model)
70
+ MODEL="$2"
71
+ shift # past argument
72
+ shift # past value
73
+ ;;
74
+
75
+ -id|--inputdir)
76
+ INPUTDIR="$2"
77
+ shift # past argument
78
+ shift # past value
79
+ ;;
80
+
81
+ -od|--outputdir)
82
+ OUTPUTDIR="$2"
83
+ shift # past argument
84
+ shift # past value
85
+ ;;
86
+
87
+ -cu|--cuda)
88
+ CUDA="$2"
89
+ shift # past argument
90
+ shift # past value
91
+ ;;
92
+
93
+ esac
94
+
95
+ done
96
+ if [ -n "${OUTPUTDIR}" ] && [ -n "${CUDA}" ]; then
97
+ python src/scripts/Tagged_document.py -m ${MODEL} -id "${INPUTDIR}" -od "${OUTPUTDIR}" -cu "${CUDA}"
98
+
99
+ elif [[ -n "${OUTPUTDIR}" ]]; then
100
+ python src/scripts/Tagged_document.py -m ${MODEL} -id "${INPUTDIR}" -od "${OUTPUTDIR}"
101
+
102
+ elif [[ -n "${CUDA}" ]]; then
103
+ python src/scripts/Tagged_document.py -m ${MODEL} -id "${INPUTDIR}" -cu "${CUDA}"
104
+
105
+ else
106
+ python src/scripts/Tagged_document.py -m ${MODEL} -id "${INPUTDIR}"
107
+ fi
108
+
109
+
110
+ else
111
+ echo Not arguments the script requires at least model and input file
112
+ fi
113
+
114
+ else
115
+ echo invalid option, USE for use a model, TRAIN for train a new model
116
+ fi
117
+
118
+ fi
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:18.04
2
+ RUN apt-get update
3
+ RUN apt-get upgrade -y
4
+ RUN apt install -y software-properties-common
5
+ RUN apt-get install --reinstall ca-certificates
6
+ RUN add-apt-repository ppa:deadsnakes/ppa
7
+ RUN apt-get install -y python3.9
8
+ RUN apt install -y python3.9-distutils
9
+ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
10
+ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1
11
+ RUN apt-get install -y python3-pip
12
+ RUN pip3 install --upgrade setuptools
13
+ RUN pip3 install --upgrade pip
14
+ RUN pip3 install --upgrade distlib
15
+ WORKDIR /workspace
16
+ ADD . /workspace/
17
+ ENV HOME=/workspace
18
+ RUN pip install -r requirements.txt
19
+ CMD ["python", "execute_GUI.py"]
app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Dec 13 17:15:20 2022
4
+
5
+ @author: gita
6
+ """
7
+ import os
8
+ import sys
9
+ default_path = os.path.dirname(os.path.abspath(__file__))
10
+ os.chdir(default_path)
11
+ sys.path.insert(0, default_path+'/src/graph')
12
+
13
+ from src.graph.GUI import execute_GUI
14
+
15
+ if __name__ == '__main__':
16
+ execute_GUI()
data/RC/rel2id.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Product-Producer": 0, "Cause-Effect": 1, "Content-Container": 2, "Component-Whole": 3, "Other": 4, "Entity-Destination": 5, "Instrument-Agency": 6, "Entity-Origin": 7, "Message-Topic": 8, "Member-Collection": 9}
data/RC/test.txt ADDED
@@ -0,0 +1,1037 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The O Component-Whole
2
+ system O -
3
+ as O -
4
+ described O -
5
+ above O -
6
+ has O -
7
+ its O -
8
+ greatest O -
9
+ application O -
10
+ in O -
11
+ an O -
12
+ arrayed O -
13
+ configuration Whole -
14
+ of O -
15
+ antenna O -
16
+ elements Component -
17
+ . O -
18
+
19
+ The O Other
20
+ child Orelation1 -
21
+ was O -
22
+ carefully O -
23
+ wrapped O -
24
+ and O -
25
+ bound O -
26
+ into O -
27
+ the O -
28
+ cradle Orelation2 -
29
+ by O -
30
+ means O -
31
+ of O -
32
+ a O -
33
+ cord O -
34
+ . O -
35
+
36
+ The O Instrument-Agency
37
+ author Agency -
38
+ of O -
39
+ a O -
40
+ keygen O -
41
+ uses O -
42
+ a O -
43
+ disassembler Instrument -
44
+ to O -
45
+ look O -
46
+ at O -
47
+ the O -
48
+ raw O -
49
+ assembly O -
50
+ code O -
51
+ . O -
52
+
53
+ A O Other
54
+ misty O -
55
+ ridge Orelation1 -
56
+ uprises O -
57
+ from O -
58
+ the O -
59
+ surge Orelation2 -
60
+ . O -
61
+
62
+ The O Member-Collection
63
+ student Member -
64
+ association Collection -
65
+ is O -
66
+ the O -
67
+ voice O -
68
+ of O -
69
+ the O -
70
+ undergraduate O -
71
+ student O -
72
+ population O -
73
+ of O -
74
+ the O -
75
+ State O -
76
+ University O -
77
+ of O -
78
+ New O -
79
+ York O -
80
+ at O -
81
+ Buffalo O -
82
+ . O -
83
+
84
+ This O Other
85
+ is O -
86
+ the O -
87
+ sprawling O -
88
+ complex Orelation1 -
89
+ that O -
90
+ is O -
91
+ Peru O -
92
+ 's O -
93
+ largest O -
94
+ producer Orelation2 -
95
+ of O -
96
+ silver O -
97
+ . O -
98
+
99
+ The O Cause-Effect
100
+ current O -
101
+ view O -
102
+ is O -
103
+ that O -
104
+ the O -
105
+ chronic O -
106
+ inflammation Effect -
107
+ in O -
108
+ the O -
109
+ distal O -
110
+ part O -
111
+ of O -
112
+ the O -
113
+ stomach O -
114
+ caused O -
115
+ by O -
116
+ Helicobacter O -
117
+ pylori O -
118
+ infection Cause -
119
+ results O -
120
+ in O -
121
+ an O -
122
+ increased O -
123
+ acid O -
124
+ production O -
125
+ from O -
126
+ the O -
127
+ non-infected O -
128
+ upper O -
129
+ corpus O -
130
+ region O -
131
+ of O -
132
+ the O -
133
+ stomach O -
134
+ . O -
135
+
136
+ People Entity -
137
+ have O -
138
+ been O -
139
+ moving O -
140
+ back O -
141
+ into O -
142
+ downtown Destination -
143
+ . O -
144
+
145
+ The O Content-Container
146
+ lawsonite Content -
147
+ was O -
148
+ contained O -
149
+ in O -
150
+ a O -
151
+ platinum Container -
152
+ crucible Container -
153
+ and O -
154
+ the O -
155
+ counter-weight O -
156
+ was O -
157
+ a O -
158
+ plastic O -
159
+ crucible O -
160
+ with O -
161
+ metal O -
162
+ pieces O -
163
+ . O -
164
+
165
+ The O Entity-Destination
166
+ solute O -
167
+ was O -
168
+ placed O -
169
+ inside O -
170
+ a O -
171
+ beaker O -
172
+ and O -
173
+ 5 O -
174
+ mL O -
175
+ of O -
176
+ the O -
177
+ solvent Entity -
178
+ was O -
179
+ pipetted O -
180
+ into O -
181
+ a O -
182
+ 25 O -
183
+ mL O -
184
+ glass O -
185
+ flask Destination -
186
+ for O -
187
+ each O -
188
+ trial O -
189
+ . O -
190
+
191
+ The O Member-Collection
192
+ fifty O -
193
+ essays Member -
194
+ collected O -
195
+ in O -
196
+ this O -
197
+ volume Collection -
198
+ testify O -
199
+ to O -
200
+ most O -
201
+ of O -
202
+ the O -
203
+ prominent O -
204
+ themes O -
205
+ from O -
206
+ Professor O -
207
+ Quispel O -
208
+ 's O -
209
+ scholarly O -
210
+ career O -
211
+ . O -
212
+
213
+ Their O Other
214
+ composer Orelation1 -
215
+ has O -
216
+ sunk O -
217
+ into O -
218
+ oblivion Orelation2 -
219
+ . O -
220
+
221
+ The O Message-Topic
222
+ Pulitzer O -
223
+ Committee O -
224
+ issues O -
225
+ an O -
226
+ official O -
227
+ citation Message -
228
+ explaining O -
229
+ the O -
230
+ reasons Topic -
231
+ for O -
232
+ the O -
233
+ award O -
234
+ . O -
235
+
236
+ The O Cause-Effect
237
+ burst Effect -
238
+ has O -
239
+ been O -
240
+ caused O -
241
+ by O -
242
+ water O -
243
+ hammer O -
244
+ pressure Cause -
245
+ . O -
246
+
247
+ Even O Instrument-Agency
248
+ commercial O -
249
+ networks Agency -
250
+ have O -
251
+ moved O -
252
+ into O -
253
+ high-definition Instrument -
254
+ broadcast Instrument -
255
+ . O -
256
+
257
+ It O Message-Topic
258
+ was O -
259
+ a O -
260
+ friendly O -
261
+ call Message -
262
+ to O -
263
+ remind O -
264
+ them O -
265
+ about O -
266
+ the O -
267
+ bill Topic -
268
+ and O -
269
+ make O -
270
+ sure O -
271
+ they O -
272
+ have O -
273
+ a O -
274
+ copy O -
275
+ of O -
276
+ the O -
277
+ invoice O -
278
+ . O -
279
+
280
+ Texas-born O Instrument-Agency
281
+ virtuoso Agency -
282
+ finds O -
283
+ harmony O -
284
+ , O -
285
+ sophistication O -
286
+ in O -
287
+ Appalachian O -
288
+ instrument Instrument -
289
+ . O -
290
+
291
+ The O Product-Producer
292
+ factory Producer -
293
+ ' O -
294
+ s O -
295
+ products O -
296
+ have O -
297
+ included O -
298
+ flower O -
299
+ pots O -
300
+ , O -
301
+ Finnish O -
302
+ rooster-whistles O -
303
+ , O -
304
+ pans O -
305
+ , O -
306
+ trays Product -
307
+ , O -
308
+ tea O -
309
+ pots O -
310
+ , O -
311
+ ash O -
312
+ trays O -
313
+ and O -
314
+ air O -
315
+ moisturisers O -
316
+ . O -
317
+
318
+ The O Component-Whole
319
+ girl O -
320
+ showed O -
321
+ a O -
322
+ photo O -
323
+ of O -
324
+ apple O -
325
+ tree Whole -
326
+ blossom Component -
327
+ on O -
328
+ a O -
329
+ fruit O -
330
+ tree O -
331
+ in O -
332
+ the O -
333
+ Central O -
334
+ Valley O -
335
+ . O -
336
+
337
+ They O Member-Collection
338
+ tried O -
339
+ an O -
340
+ assault O -
341
+ of O -
342
+ their O -
343
+ own O -
344
+ an O -
345
+ hour O -
346
+ later O -
347
+ , O -
348
+ with O -
349
+ two O -
350
+ columns O -
351
+ of O -
352
+ sixteen O -
353
+ tanks O -
354
+ backed O -
355
+ by O -
356
+ a O -
357
+ battalion Collection -
358
+ of O -
359
+ Panzer O -
360
+ grenadiers Member -
361
+ . O -
362
+
363
+ Their O Entity-Origin
364
+ knowledge Entity -
365
+ of O -
366
+ the O -
367
+ power O -
368
+ and O -
369
+ rank O -
370
+ symbols O -
371
+ of O -
372
+ the O -
373
+ Continental O -
374
+ empires O -
375
+ was O -
376
+ gained O -
377
+ from O -
378
+ the O -
379
+ numerous O -
380
+ Germanic O -
381
+ recruits Origin -
382
+ in O -
383
+ the O -
384
+ Roman O -
385
+ army O -
386
+ , O -
387
+ and O -
388
+ from O -
389
+ the O -
390
+ Roman O -
391
+ practice O -
392
+ of O -
393
+ enfeoffing O -
394
+ various O -
395
+ Germanic O -
396
+ warrior O -
397
+ groups O -
398
+ with O -
399
+ land O -
400
+ in O -
401
+ the O -
402
+ imperial O -
403
+ provinces O -
404
+ . O -
405
+
406
+ She O Member-Collection
407
+ soon O -
408
+ had O -
409
+ a O -
410
+ stable Collection -
411
+ of O -
412
+ her O -
413
+ own O -
414
+ rescued O -
415
+ hounds Member -
416
+ . O -
417
+
418
+ The O Cause-Effect
419
+ singer Cause -
420
+ , O -
421
+ who O -
422
+ performed O -
423
+ three O -
424
+ of O -
425
+ the O -
426
+ nominated O -
427
+ songs O -
428
+ , O -
429
+ also O -
430
+ caused O -
431
+ a O -
432
+ commotion Effect -
433
+ on O -
434
+ the O -
435
+ red O -
436
+ carpet O -
437
+ . O -
438
+
439
+ His O Other
440
+ intellectually O -
441
+ engaging O -
442
+ books O -
443
+ and O -
444
+ essays Orelation1 -
445
+ remain O -
446
+ pertinent O -
447
+ to O -
448
+ illuminating O -
449
+ contemporary O -
450
+ history Orelation2 -
451
+ . O -
452
+
453
+ Poor O Member-Collection
454
+ hygiene O -
455
+ controls O -
456
+ , O -
457
+ reports O -
458
+ of O -
459
+ a O -
460
+ brace Collection -
461
+ of O -
462
+ gamey O -
463
+ grouse Member -
464
+ and O -
465
+ what O -
466
+ looked O -
467
+ like O -
468
+ a O -
469
+ skinned O -
470
+ fox O -
471
+ all O -
472
+ amounted O -
473
+ to O -
474
+ a O -
475
+ pie O -
476
+ that O -
477
+ was O -
478
+ unfit O -
479
+ for O -
480
+ human O -
481
+ consumption O -
482
+ . O -
483
+
484
+ This O Other
485
+ sweet O -
486
+ dress Orelation1 -
487
+ is O -
488
+ made O -
489
+ with O -
490
+ a O -
491
+ blend Orelation2 -
492
+ of O -
493
+ cotton O -
494
+ and O -
495
+ silk O -
496
+ , O -
497
+ and O -
498
+ the O -
499
+ crochet O -
500
+ flower O -
501
+ necklace O -
502
+ is O -
503
+ the O -
504
+ perfect O -
505
+ accessory O -
506
+ . O -
507
+
508
+ Suicide Cause -
509
+ is O -
510
+ one O -
511
+ of O -
512
+ the O -
513
+ leading O -
514
+ causes O -
515
+ of O -
516
+ death Effect -
517
+ among O -
518
+ pre-adolescents O -
519
+ and O -
520
+ teens O -
521
+ , O -
522
+ and O -
523
+ victims O -
524
+ of O -
525
+ bullying O -
526
+ are O -
527
+ at O -
528
+ an O -
529
+ increased O -
530
+ risk O -
531
+ for O -
532
+ committing O -
533
+ suicide O -
534
+ . O -
535
+
536
+ This O Message-Topic
537
+ article Message -
538
+ gives O -
539
+ details O -
540
+ on O -
541
+ 2004 O -
542
+ in O -
543
+ music Topic -
544
+ in O -
545
+ the O -
546
+ United O -
547
+ Kingdom O -
548
+ , O -
549
+ including O -
550
+ the O -
551
+ official O -
552
+ charts O -
553
+ from O -
554
+ that O -
555
+ year O -
556
+ . O -
557
+
558
+ We O Message-Topic
559
+ have O -
560
+ therefore O -
561
+ taken O -
562
+ the O -
563
+ initiative O -
564
+ to O -
565
+ convene O -
566
+ the O -
567
+ first O -
568
+ international O -
569
+ open O -
570
+ meeting Message -
571
+ dedicated O -
572
+ solely O -
573
+ to O -
574
+ rural Topic -
575
+ history Topic -
576
+ . O -
577
+
578
+ The O Component-Whole
579
+ timer Component -
580
+ of O -
581
+ the O -
582
+ device Whole -
583
+ automatically O -
584
+ eliminates O -
585
+ wasted O -
586
+ " O -
587
+ standby O -
588
+ power O -
589
+ " O -
590
+ consumption O -
591
+ by O -
592
+ automatically O -
593
+ turn O -
594
+ off O -
595
+ electronics O -
596
+ plugged O -
597
+ into O -
598
+ the O -
599
+ " O -
600
+ auto O -
601
+ off O -
602
+ " O -
603
+ outlets O -
604
+ . O -
605
+
606
+ Bob O Message-Topic
607
+ Parks O -
608
+ made O -
609
+ a O -
610
+ similar O -
611
+ offer Topic -
612
+ in O -
613
+ a O -
614
+ phone Message -
615
+ call Message -
616
+ made O -
617
+ earlier O -
618
+ this O -
619
+ week O -
620
+ . O -
621
+
622
+ He O Cause-Effect
623
+ had O -
624
+ chest O -
625
+ pains O -
626
+ and O -
627
+ headaches Effect -
628
+ from O -
629
+ mold Cause -
630
+ in O -
631
+ the O -
632
+ bedrooms O -
633
+ . O -
634
+
635
+ The O Product-Producer
636
+ silver-haired O -
637
+ author O -
638
+ was O -
639
+ not O -
640
+ just O -
641
+ laying O -
642
+ India O -
643
+ 's O -
644
+ politician O -
645
+ saint O -
646
+ to O -
647
+ rest O -
648
+ but O -
649
+ healing O -
650
+ a O -
651
+ generations-old O -
652
+ rift O -
653
+ in O -
654
+ the O -
655
+ family O -
656
+ of O -
657
+ the O -
658
+ country Product -
659
+ ' O -
660
+ s O -
661
+ founding O -
662
+ father Producer -
663
+ . O -
664
+
665
+ It O Entity-Destination
666
+ describes O -
667
+ a O -
668
+ method O -
669
+ for O -
670
+ loading O -
671
+ a O -
672
+ horizontal O -
673
+ stack Entity -
674
+ of O -
675
+ containers O -
676
+ into O -
677
+ a O -
678
+ carton Destination -
679
+ . O -
680
+
681
+ The O Component-Whole
682
+ Foundation O -
683
+ decided O -
684
+ to O -
685
+ repurpose O -
686
+ the O -
687
+ building O -
688
+ in O -
689
+ order O -
690
+ to O -
691
+ reduce O -
692
+ wear O -
693
+ and O -
694
+ tear O -
695
+ on O -
696
+ the O -
697
+ plumbing Component -
698
+ in O -
699
+ the O -
700
+ manor Whole -
701
+ house Whole -
702
+ by O -
703
+ redirecting O -
704
+ visitors O -
705
+ during O -
706
+ restoration O -
707
+ projects O -
708
+ and O -
709
+ beyond O -
710
+ . O -
711
+
712
+ The O Entity-Origin
713
+ technology O -
714
+ is O -
715
+ available O -
716
+ to O -
717
+ produce O -
718
+ and O -
719
+ transmit O -
720
+ electricity Entity -
721
+ economically O -
722
+ from O -
723
+ OTEC O -
724
+ systems Origin -
725
+ . O -
726
+
727
+ The O Other
728
+ Medicare O -
729
+ buy-in O -
730
+ plan Orelation1 -
731
+ ran O -
732
+ into O -
733
+ Senate O -
734
+ resistance Orelation2 -
735
+ . O -
736
+
737
+ The O Component-Whole
738
+ provinces Whole -
739
+ are O -
740
+ divided O -
741
+ into O -
742
+ counties Component -
743
+ ( O -
744
+ shahrestan O -
745
+ ) O -
746
+ , O -
747
+ and O -
748
+ subdivided O -
749
+ into O -
750
+ districts O -
751
+ ( O -
752
+ bakhsh O -
753
+ ) O -
754
+ and O -
755
+ sub-districts O -
756
+ ( O -
757
+ dehestan O -
758
+ ) O -
759
+ . O -
760
+
761
+ Financial O Cause-Effect
762
+ stress Cause -
763
+ is O -
764
+ one O -
765
+ of O -
766
+ the O -
767
+ main O -
768
+ causes O -
769
+ of O -
770
+ divorce Effect -
771
+ . O -
772
+
773
+ Newspapers Agency -
774
+ swap O -
775
+ content O -
776
+ via O -
777
+ widgets O -
778
+ with O -
779
+ the O -
780
+ help O -
781
+ of O -
782
+ the O -
783
+ newsgator O -
784
+ service Instrument -
785
+ . O -
786
+
787
+ The O Cause-Effect
788
+ women Cause -
789
+ that O -
790
+ caused O -
791
+ the O -
792
+ accident Effect -
793
+ was O -
794
+ on O -
795
+ the O -
796
+ cell O -
797
+ phone O -
798
+ and O -
799
+ ran O -
800
+ thru O -
801
+ the O -
802
+ intersection O -
803
+ without O -
804
+ pausing O -
805
+ on O -
806
+ the O -
807
+ median O -
808
+ . O -
809
+
810
+ The O Content-Container
811
+ transmitter Content -
812
+ was O -
813
+ discovered O -
814
+ inside O -
815
+ a O -
816
+ bed O -
817
+ settee O -
818
+ suite Container -
819
+ on O -
820
+ which O -
821
+ he O -
822
+ had O -
823
+ been O -
824
+ sitting O -
825
+ . O -
826
+
827
+ The O Member-Collection
828
+ Kerala O -
829
+ backwaters O -
830
+ are O -
831
+ a O -
832
+ chain Collection -
833
+ of O -
834
+ brackish O -
835
+ lagoons Member -
836
+ and O -
837
+ lakes O -
838
+ lying O -
839
+ parallel O -
840
+ to O -
841
+ the O -
842
+ Arabian O -
843
+ Sea O -
844
+ coast O -
845
+ of O -
846
+ Kerala O -
847
+ state O -
848
+ in O -
849
+ southern O -
850
+ India O -
851
+ . O -
852
+
853
+ A O Entity-Origin
854
+ St. O -
855
+ Paul O -
856
+ College O -
857
+ student Entity -
858
+ was O -
859
+ released O -
860
+ from O -
861
+ jail Origin -
862
+ Wednesday O -
863
+ night O -
864
+ , O -
865
+ after O -
866
+ his O -
867
+ arrest O -
868
+ Tuesday O -
869
+ in O -
870
+ the O -
871
+ alleged O -
872
+ rape O -
873
+ of O -
874
+ another O -
875
+ student O -
876
+ on O -
877
+ campus O -
878
+ . O -
879
+
880
+ Calluses Effect -
881
+ are O -
882
+ caused O -
883
+ by O -
884
+ improperly O -
885
+ fitting O -
886
+ shoes O -
887
+ or O -
888
+ by O -
889
+ a O -
890
+ skin Cause -
891
+ abnormality Cause -
892
+ . O -
893
+
894
+ Adults Agency -
895
+ use O -
896
+ drugs Instrument -
897
+ for O -
898
+ this O -
899
+ purpose O -
900
+ . O -
901
+
902
+ The O Instrument-Agency
903
+ councilor Agency -
904
+ proposed O -
905
+ assessing O -
906
+ infinitival O -
907
+ complements O -
908
+ through O -
909
+ elicitation Instrument -
910
+ . O -
911
+
912
+ As O Cause-Effect
913
+ in O -
914
+ the O -
915
+ popular O -
916
+ movie O -
917
+ " O -
918
+ Deep O -
919
+ Impact O -
920
+ " O -
921
+ , O -
922
+ the O -
923
+ action O -
924
+ of O -
925
+ the O -
926
+ Perseid O -
927
+ meteor Effect -
928
+ shower Effect -
929
+ is O -
930
+ caused O -
931
+ by O -
932
+ a O -
933
+ comet Cause -
934
+ , O -
935
+ in O -
936
+ this O -
937
+ case O -
938
+ periodic O -
939
+ comet O -
940
+ Swift-Tuttle O -
941
+ . O -
942
+
943
+ The O Other
944
+ following O -
945
+ information O -
946
+ appeared O -
947
+ in O -
948
+ the O -
949
+ notes Orelation1 -
950
+ to O -
951
+ consolidated O -
952
+ financial O -
953
+ statements Orelation2 -
954
+ of O -
955
+ some O -
956
+ corporate O -
957
+ annual O -
958
+ reports O -
959
+ . O -
960
+
961
+ HipHop O Other
962
+ appropriates O -
963
+ the O -
964
+ symbols O -
965
+ of O -
966
+ a O -
967
+ consumer O -
968
+ society O -
969
+ : O -
970
+ oversized O -
971
+ diamond Orelation1 -
972
+ colliers Orelation2 -
973
+ are O -
974
+ worn O -
975
+ . O -
976
+
977
+ The O Cause-Effect
978
+ radiation Effect -
979
+ from O -
980
+ the O -
981
+ atomic O -
982
+ bomb Cause -
983
+ explosion Cause -
984
+ is O -
985
+ a O -
986
+ typical O -
987
+ acute O -
988
+ radiation O -
989
+ . O -
990
+
991
+ The O Component-Whole
992
+ ride-on O -
993
+ boat Whole -
994
+ tiller Component -
995
+ was O -
996
+ developed O -
997
+ by O -
998
+ engineers O -
999
+ Arnold O -
1000
+ S. O -
1001
+ Juliano O -
1002
+ and O -
1003
+ Dr. O -
1004
+ Eulito O -
1005
+ U. O -
1006
+ Bautista O -
1007
+ . O -
1008
+
1009
+ A O Cause-Effect
1010
+ neoplastic O -
1011
+ recurrence Effect -
1012
+ arose O -
1013
+ from O -
1014
+ an O -
1015
+ extensive O -
1016
+ radiation Cause -
1017
+ induced O -
1018
+ ulceration O -
1019
+ . O -
1020
+
1021
+ He O Cause-Effect
1022
+ has O -
1023
+ a O -
1024
+ tattoo O -
1025
+ on O -
1026
+ his O -
1027
+ right O -
1028
+ arm O -
1029
+ and O -
1030
+ scars Effect -
1031
+ from O -
1032
+ stitches Cause -
1033
+ on O -
1034
+ his O -
1035
+ right O -
1036
+ elbow O -
1037
+ . O -
data/RC/train.txt ADDED
@@ -0,0 +1,1037 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The O Component-Whole
2
+ system O -
3
+ as O -
4
+ described O -
5
+ above O -
6
+ has O -
7
+ its O -
8
+ greatest O -
9
+ application O -
10
+ in O -
11
+ an O -
12
+ arrayed O -
13
+ configuration Whole -
14
+ of O -
15
+ antenna O -
16
+ elements Component -
17
+ . O -
18
+
19
+ The O Other
20
+ child Orelation1 -
21
+ was O -
22
+ carefully O -
23
+ wrapped O -
24
+ and O -
25
+ bound O -
26
+ into O -
27
+ the O -
28
+ cradle Orelation2 -
29
+ by O -
30
+ means O -
31
+ of O -
32
+ a O -
33
+ cord O -
34
+ . O -
35
+
36
+ The O Instrument-Agency
37
+ author Agency -
38
+ of O -
39
+ a O -
40
+ keygen O -
41
+ uses O -
42
+ a O -
43
+ disassembler Instrument -
44
+ to O -
45
+ look O -
46
+ at O -
47
+ the O -
48
+ raw O -
49
+ assembly O -
50
+ code O -
51
+ . O -
52
+
53
+ A O Other
54
+ misty O -
55
+ ridge Orelation1 -
56
+ uprises O -
57
+ from O -
58
+ the O -
59
+ surge Orelation2 -
60
+ . O -
61
+
62
+ The O Member-Collection
63
+ student Member -
64
+ association Collection -
65
+ is O -
66
+ the O -
67
+ voice O -
68
+ of O -
69
+ the O -
70
+ undergraduate O -
71
+ student O -
72
+ population O -
73
+ of O -
74
+ the O -
75
+ State O -
76
+ University O -
77
+ of O -
78
+ New O -
79
+ York O -
80
+ at O -
81
+ Buffalo O -
82
+ . O -
83
+
84
+ This O Other
85
+ is O -
86
+ the O -
87
+ sprawling O -
88
+ complex Orelation1 -
89
+ that O -
90
+ is O -
91
+ Peru O -
92
+ 's O -
93
+ largest O -
94
+ producer Orelation2 -
95
+ of O -
96
+ silver O -
97
+ . O -
98
+
99
+ The O Cause-Effect
100
+ current O -
101
+ view O -
102
+ is O -
103
+ that O -
104
+ the O -
105
+ chronic O -
106
+ inflammation Effect -
107
+ in O -
108
+ the O -
109
+ distal O -
110
+ part O -
111
+ of O -
112
+ the O -
113
+ stomach O -
114
+ caused O -
115
+ by O -
116
+ Helicobacter O -
117
+ pylori O -
118
+ infection Cause -
119
+ results O -
120
+ in O -
121
+ an O -
122
+ increased O -
123
+ acid O -
124
+ production O -
125
+ from O -
126
+ the O -
127
+ non-infected O -
128
+ upper O -
129
+ corpus O -
130
+ region O -
131
+ of O -
132
+ the O -
133
+ stomach O -
134
+ . O -
135
+
136
+ People Entity -
137
+ have O -
138
+ been O -
139
+ moving O -
140
+ back O -
141
+ into O -
142
+ downtown Destination -
143
+ . O -
144
+
145
+ The O Content-Container
146
+ lawsonite Content -
147
+ was O -
148
+ contained O -
149
+ in O -
150
+ a O -
151
+ platinum Container -
152
+ crucible Container -
153
+ and O -
154
+ the O -
155
+ counter-weight O -
156
+ was O -
157
+ a O -
158
+ plastic O -
159
+ crucible O -
160
+ with O -
161
+ metal O -
162
+ pieces O -
163
+ . O -
164
+
165
+ The O Entity-Destination
166
+ solute O -
167
+ was O -
168
+ placed O -
169
+ inside O -
170
+ a O -
171
+ beaker O -
172
+ and O -
173
+ 5 O -
174
+ mL O -
175
+ of O -
176
+ the O -
177
+ solvent Entity -
178
+ was O -
179
+ pipetted O -
180
+ into O -
181
+ a O -
182
+ 25 O -
183
+ mL O -
184
+ glass O -
185
+ flask Destination -
186
+ for O -
187
+ each O -
188
+ trial O -
189
+ . O -
190
+
191
+ The O Member-Collection
192
+ fifty O -
193
+ essays Member -
194
+ collected O -
195
+ in O -
196
+ this O -
197
+ volume Collection -
198
+ testify O -
199
+ to O -
200
+ most O -
201
+ of O -
202
+ the O -
203
+ prominent O -
204
+ themes O -
205
+ from O -
206
+ Professor O -
207
+ Quispel O -
208
+ 's O -
209
+ scholarly O -
210
+ career O -
211
+ . O -
212
+
213
+ Their O Other
214
+ composer Orelation1 -
215
+ has O -
216
+ sunk O -
217
+ into O -
218
+ oblivion Orelation2 -
219
+ . O -
220
+
221
+ The O Message-Topic
222
+ Pulitzer O -
223
+ Committee O -
224
+ issues O -
225
+ an O -
226
+ official O -
227
+ citation Message -
228
+ explaining O -
229
+ the O -
230
+ reasons Topic -
231
+ for O -
232
+ the O -
233
+ award O -
234
+ . O -
235
+
236
+ The O Cause-Effect
237
+ burst Effect -
238
+ has O -
239
+ been O -
240
+ caused O -
241
+ by O -
242
+ water O -
243
+ hammer O -
244
+ pressure Cause -
245
+ . O -
246
+
247
+ Even O Instrument-Agency
248
+ commercial O -
249
+ networks Agency -
250
+ have O -
251
+ moved O -
252
+ into O -
253
+ high-definition Instrument -
254
+ broadcast Instrument -
255
+ . O -
256
+
257
+ It O Message-Topic
258
+ was O -
259
+ a O -
260
+ friendly O -
261
+ call Message -
262
+ to O -
263
+ remind O -
264
+ them O -
265
+ about O -
266
+ the O -
267
+ bill Topic -
268
+ and O -
269
+ make O -
270
+ sure O -
271
+ they O -
272
+ have O -
273
+ a O -
274
+ copy O -
275
+ of O -
276
+ the O -
277
+ invoice O -
278
+ . O -
279
+
280
+ Texas-born O Instrument-Agency
281
+ virtuoso Agency -
282
+ finds O -
283
+ harmony O -
284
+ , O -
285
+ sophistication O -
286
+ in O -
287
+ Appalachian O -
288
+ instrument Instrument -
289
+ . O -
290
+
291
+ The O Product-Producer
292
+ factory Producer -
293
+ ' O -
294
+ s O -
295
+ products O -
296
+ have O -
297
+ included O -
298
+ flower O -
299
+ pots O -
300
+ , O -
301
+ Finnish O -
302
+ rooster-whistles O -
303
+ , O -
304
+ pans O -
305
+ , O -
306
+ trays Product -
307
+ , O -
308
+ tea O -
309
+ pots O -
310
+ , O -
311
+ ash O -
312
+ trays O -
313
+ and O -
314
+ air O -
315
+ moisturisers O -
316
+ . O -
317
+
318
+ The O Component-Whole
319
+ girl O -
320
+ showed O -
321
+ a O -
322
+ photo O -
323
+ of O -
324
+ apple O -
325
+ tree Whole -
326
+ blossom Component -
327
+ on O -
328
+ a O -
329
+ fruit O -
330
+ tree O -
331
+ in O -
332
+ the O -
333
+ Central O -
334
+ Valley O -
335
+ . O -
336
+
337
+ They O Member-Collection
338
+ tried O -
339
+ an O -
340
+ assault O -
341
+ of O -
342
+ their O -
343
+ own O -
344
+ an O -
345
+ hour O -
346
+ later O -
347
+ , O -
348
+ with O -
349
+ two O -
350
+ columns O -
351
+ of O -
352
+ sixteen O -
353
+ tanks O -
354
+ backed O -
355
+ by O -
356
+ a O -
357
+ battalion Collection -
358
+ of O -
359
+ Panzer O -
360
+ grenadiers Member -
361
+ . O -
362
+
363
+ Their O Entity-Origin
364
+ knowledge Entity -
365
+ of O -
366
+ the O -
367
+ power O -
368
+ and O -
369
+ rank O -
370
+ symbols O -
371
+ of O -
372
+ the O -
373
+ Continental O -
374
+ empires O -
375
+ was O -
376
+ gained O -
377
+ from O -
378
+ the O -
379
+ numerous O -
380
+ Germanic O -
381
+ recruits Origin -
382
+ in O -
383
+ the O -
384
+ Roman O -
385
+ army O -
386
+ , O -
387
+ and O -
388
+ from O -
389
+ the O -
390
+ Roman O -
391
+ practice O -
392
+ of O -
393
+ enfeoffing O -
394
+ various O -
395
+ Germanic O -
396
+ warrior O -
397
+ groups O -
398
+ with O -
399
+ land O -
400
+ in O -
401
+ the O -
402
+ imperial O -
403
+ provinces O -
404
+ . O -
405
+
406
+ She O Member-Collection
407
+ soon O -
408
+ had O -
409
+ a O -
410
+ stable Collection -
411
+ of O -
412
+ her O -
413
+ own O -
414
+ rescued O -
415
+ hounds Member -
416
+ . O -
417
+
418
+ The O Cause-Effect
419
+ singer Cause -
420
+ , O -
421
+ who O -
422
+ performed O -
423
+ three O -
424
+ of O -
425
+ the O -
426
+ nominated O -
427
+ songs O -
428
+ , O -
429
+ also O -
430
+ caused O -
431
+ a O -
432
+ commotion Effect -
433
+ on O -
434
+ the O -
435
+ red O -
436
+ carpet O -
437
+ . O -
438
+
439
+ His O Other
440
+ intellectually O -
441
+ engaging O -
442
+ books O -
443
+ and O -
444
+ essays Orelation1 -
445
+ remain O -
446
+ pertinent O -
447
+ to O -
448
+ illuminating O -
449
+ contemporary O -
450
+ history Orelation2 -
451
+ . O -
452
+
453
+ Poor O Member-Collection
454
+ hygiene O -
455
+ controls O -
456
+ , O -
457
+ reports O -
458
+ of O -
459
+ a O -
460
+ brace Collection -
461
+ of O -
462
+ gamey O -
463
+ grouse Member -
464
+ and O -
465
+ what O -
466
+ looked O -
467
+ like O -
468
+ a O -
469
+ skinned O -
470
+ fox O -
471
+ all O -
472
+ amounted O -
473
+ to O -
474
+ a O -
475
+ pie O -
476
+ that O -
477
+ was O -
478
+ unfit O -
479
+ for O -
480
+ human O -
481
+ consumption O -
482
+ . O -
483
+
484
+ This O Other
485
+ sweet O -
486
+ dress Orelation1 -
487
+ is O -
488
+ made O -
489
+ with O -
490
+ a O -
491
+ blend Orelation2 -
492
+ of O -
493
+ cotton O -
494
+ and O -
495
+ silk O -
496
+ , O -
497
+ and O -
498
+ the O -
499
+ crochet O -
500
+ flower O -
501
+ necklace O -
502
+ is O -
503
+ the O -
504
+ perfect O -
505
+ accessory O -
506
+ . O -
507
+
508
+ Suicide Cause -
509
+ is O -
510
+ one O -
511
+ of O -
512
+ the O -
513
+ leading O -
514
+ causes O -
515
+ of O -
516
+ death Effect -
517
+ among O -
518
+ pre-adolescents O -
519
+ and O -
520
+ teens O -
521
+ , O -
522
+ and O -
523
+ victims O -
524
+ of O -
525
+ bullying O -
526
+ are O -
527
+ at O -
528
+ an O -
529
+ increased O -
530
+ risk O -
531
+ for O -
532
+ committing O -
533
+ suicide O -
534
+ . O -
535
+
536
+ This O Message-Topic
537
+ article Message -
538
+ gives O -
539
+ details O -
540
+ on O -
541
+ 2004 O -
542
+ in O -
543
+ music Topic -
544
+ in O -
545
+ the O -
546
+ United O -
547
+ Kingdom O -
548
+ , O -
549
+ including O -
550
+ the O -
551
+ official O -
552
+ charts O -
553
+ from O -
554
+ that O -
555
+ year O -
556
+ . O -
557
+
558
+ We O Message-Topic
559
+ have O -
560
+ therefore O -
561
+ taken O -
562
+ the O -
563
+ initiative O -
564
+ to O -
565
+ convene O -
566
+ the O -
567
+ first O -
568
+ international O -
569
+ open O -
570
+ meeting Message -
571
+ dedicated O -
572
+ solely O -
573
+ to O -
574
+ rural Topic -
575
+ history Topic -
576
+ . O -
577
+
578
+ The O Component-Whole
579
+ timer Component -
580
+ of O -
581
+ the O -
582
+ device Whole -
583
+ automatically O -
584
+ eliminates O -
585
+ wasted O -
586
+ " O -
587
+ standby O -
588
+ power O -
589
+ " O -
590
+ consumption O -
591
+ by O -
592
+ automatically O -
593
+ turn O -
594
+ off O -
595
+ electronics O -
596
+ plugged O -
597
+ into O -
598
+ the O -
599
+ " O -
600
+ auto O -
601
+ off O -
602
+ " O -
603
+ outlets O -
604
+ . O -
605
+
606
+ Bob O Message-Topic
607
+ Parks O -
608
+ made O -
609
+ a O -
610
+ similar O -
611
+ offer Topic -
612
+ in O -
613
+ a O -
614
+ phone Message -
615
+ call Message -
616
+ made O -
617
+ earlier O -
618
+ this O -
619
+ week O -
620
+ . O -
621
+
622
+ He O Cause-Effect
623
+ had O -
624
+ chest O -
625
+ pains O -
626
+ and O -
627
+ headaches Effect -
628
+ from O -
629
+ mold Cause -
630
+ in O -
631
+ the O -
632
+ bedrooms O -
633
+ . O -
634
+
635
+ The O Product-Producer
636
+ silver-haired O -
637
+ author O -
638
+ was O -
639
+ not O -
640
+ just O -
641
+ laying O -
642
+ India O -
643
+ 's O -
644
+ politician O -
645
+ saint O -
646
+ to O -
647
+ rest O -
648
+ but O -
649
+ healing O -
650
+ a O -
651
+ generations-old O -
652
+ rift O -
653
+ in O -
654
+ the O -
655
+ family O -
656
+ of O -
657
+ the O -
658
+ country Product -
659
+ ' O -
660
+ s O -
661
+ founding O -
662
+ father Producer -
663
+ . O -
664
+
665
+ It O Entity-Destination
666
+ describes O -
667
+ a O -
668
+ method O -
669
+ for O -
670
+ loading O -
671
+ a O -
672
+ horizontal O -
673
+ stack Entity -
674
+ of O -
675
+ containers O -
676
+ into O -
677
+ a O -
678
+ carton Destination -
679
+ . O -
680
+
681
+ The O Component-Whole
682
+ Foundation O -
683
+ decided O -
684
+ to O -
685
+ repurpose O -
686
+ the O -
687
+ building O -
688
+ in O -
689
+ order O -
690
+ to O -
691
+ reduce O -
692
+ wear O -
693
+ and O -
694
+ tear O -
695
+ on O -
696
+ the O -
697
+ plumbing Component -
698
+ in O -
699
+ the O -
700
+ manor Whole -
701
+ house Whole -
702
+ by O -
703
+ redirecting O -
704
+ visitors O -
705
+ during O -
706
+ restoration O -
707
+ projects O -
708
+ and O -
709
+ beyond O -
710
+ . O -
711
+
712
+ The O Entity-Origin
713
+ technology O -
714
+ is O -
715
+ available O -
716
+ to O -
717
+ produce O -
718
+ and O -
719
+ transmit O -
720
+ electricity Entity -
721
+ economically O -
722
+ from O -
723
+ OTEC O -
724
+ systems Origin -
725
+ . O -
726
+
727
+ The O Other
728
+ Medicare O -
729
+ buy-in O -
730
+ plan Orelation1 -
731
+ ran O -
732
+ into O -
733
+ Senate O -
734
+ resistance Orelation2 -
735
+ . O -
736
+
737
+ The O Component-Whole
738
+ provinces Whole -
739
+ are O -
740
+ divided O -
741
+ into O -
742
+ counties Component -
743
+ ( O -
744
+ shahrestan O -
745
+ ) O -
746
+ , O -
747
+ and O -
748
+ subdivided O -
749
+ into O -
750
+ districts O -
751
+ ( O -
752
+ bakhsh O -
753
+ ) O -
754
+ and O -
755
+ sub-districts O -
756
+ ( O -
757
+ dehestan O -
758
+ ) O -
759
+ . O -
760
+
761
+ Financial O Cause-Effect
762
+ stress Cause -
763
+ is O -
764
+ one O -
765
+ of O -
766
+ the O -
767
+ main O -
768
+ causes O -
769
+ of O -
770
+ divorce Effect -
771
+ . O -
772
+
773
+ Newspapers Agency -
774
+ swap O -
775
+ content O -
776
+ via O -
777
+ widgets O -
778
+ with O -
779
+ the O -
780
+ help O -
781
+ of O -
782
+ the O -
783
+ newsgator O -
784
+ service Instrument -
785
+ . O -
786
+
787
+ The O Cause-Effect
788
+ women Cause -
789
+ that O -
790
+ caused O -
791
+ the O -
792
+ accident Effect -
793
+ was O -
794
+ on O -
795
+ the O -
796
+ cell O -
797
+ phone O -
798
+ and O -
799
+ ran O -
800
+ thru O -
801
+ the O -
802
+ intersection O -
803
+ without O -
804
+ pausing O -
805
+ on O -
806
+ the O -
807
+ median O -
808
+ . O -
809
+
810
+ The O Content-Container
811
+ transmitter Content -
812
+ was O -
813
+ discovered O -
814
+ inside O -
815
+ a O -
816
+ bed O -
817
+ settee O -
818
+ suite Container -
819
+ on O -
820
+ which O -
821
+ he O -
822
+ had O -
823
+ been O -
824
+ sitting O -
825
+ . O -
826
+
827
+ The O Member-Collection
828
+ Kerala O -
829
+ backwaters O -
830
+ are O -
831
+ a O -
832
+ chain Collection -
833
+ of O -
834
+ brackish O -
835
+ lagoons Member -
836
+ and O -
837
+ lakes O -
838
+ lying O -
839
+ parallel O -
840
+ to O -
841
+ the O -
842
+ Arabian O -
843
+ Sea O -
844
+ coast O -
845
+ of O -
846
+ Kerala O -
847
+ state O -
848
+ in O -
849
+ southern O -
850
+ India O -
851
+ . O -
852
+
853
+ A O Entity-Origin
854
+ St. O -
855
+ Paul O -
856
+ College O -
857
+ student Entity -
858
+ was O -
859
+ released O -
860
+ from O -
861
+ jail Origin -
862
+ Wednesday O -
863
+ night O -
864
+ , O -
865
+ after O -
866
+ his O -
867
+ arrest O -
868
+ Tuesday O -
869
+ in O -
870
+ the O -
871
+ alleged O -
872
+ rape O -
873
+ of O -
874
+ another O -
875
+ student O -
876
+ on O -
877
+ campus O -
878
+ . O -
879
+
880
+ Calluses Effect -
881
+ are O -
882
+ caused O -
883
+ by O -
884
+ improperly O -
885
+ fitting O -
886
+ shoes O -
887
+ or O -
888
+ by O -
889
+ a O -
890
+ skin Cause -
891
+ abnormality Cause -
892
+ . O -
893
+
894
+ Adults Agency -
895
+ use O -
896
+ drugs Instrument -
897
+ for O -
898
+ this O -
899
+ purpose O -
900
+ . O -
901
+
902
+ The O Instrument-Agency
903
+ councilor Agency -
904
+ proposed O -
905
+ assessing O -
906
+ infinitival O -
907
+ complements O -
908
+ through O -
909
+ elicitation Instrument -
910
+ . O -
911
+
912
+ As O Cause-Effect
913
+ in O -
914
+ the O -
915
+ popular O -
916
+ movie O -
917
+ " O -
918
+ Deep O -
919
+ Impact O -
920
+ " O -
921
+ , O -
922
+ the O -
923
+ action O -
924
+ of O -
925
+ the O -
926
+ Perseid O -
927
+ meteor Effect -
928
+ shower Effect -
929
+ is O -
930
+ caused O -
931
+ by O -
932
+ a O -
933
+ comet Cause -
934
+ , O -
935
+ in O -
936
+ this O -
937
+ case O -
938
+ periodic O -
939
+ comet O -
940
+ Swift-Tuttle O -
941
+ . O -
942
+
943
+ The O Other
944
+ following O -
945
+ information O -
946
+ appeared O -
947
+ in O -
948
+ the O -
949
+ notes Orelation1 -
950
+ to O -
951
+ consolidated O -
952
+ financial O -
953
+ statements Orelation2 -
954
+ of O -
955
+ some O -
956
+ corporate O -
957
+ annual O -
958
+ reports O -
959
+ . O -
960
+
961
+ HipHop O Other
962
+ appropriates O -
963
+ the O -
964
+ symbols O -
965
+ of O -
966
+ a O -
967
+ consumer O -
968
+ society O -
969
+ : O -
970
+ oversized O -
971
+ diamond Orelation1 -
972
+ colliers Orelation2 -
973
+ are O -
974
+ worn O -
975
+ . O -
976
+
977
+ The O Cause-Effect
978
+ radiation Effect -
979
+ from O -
980
+ the O -
981
+ atomic O -
982
+ bomb Cause -
983
+ explosion Cause -
984
+ is O -
985
+ a O -
986
+ typical O -
987
+ acute O -
988
+ radiation O -
989
+ . O -
990
+
991
+ The O Component-Whole
992
+ ride-on O -
993
+ boat Whole -
994
+ tiller Component -
995
+ was O -
996
+ developed O -
997
+ by O -
998
+ engineers O -
999
+ Arnold O -
1000
+ S. O -
1001
+ Juliano O -
1002
+ and O -
1003
+ Dr. O -
1004
+ Eulito O -
1005
+ U. O -
1006
+ Bautista O -
1007
+ . O -
1008
+
1009
+ A O Cause-Effect
1010
+ neoplastic O -
1011
+ recurrence Effect -
1012
+ arose O -
1013
+ from O -
1014
+ an O -
1015
+ extensive O -
1016
+ radiation Cause -
1017
+ induced O -
1018
+ ulceration O -
1019
+ . O -
1020
+
1021
+ He O Cause-Effect
1022
+ has O -
1023
+ a O -
1024
+ tattoo O -
1025
+ on O -
1026
+ his O -
1027
+ right O -
1028
+ arm O -
1029
+ and O -
1030
+ scars Effect -
1031
+ from O -
1032
+ stitches Cause -
1033
+ on O -
1034
+ his O -
1035
+ right O -
1036
+ elbow O -
1037
+ . O -
data/RC/train_f.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/train/test.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/train/train.txt ADDED
The diff for this file is too large to render. See raw diff
 
fast/07fc1ce0-86ac-11e8-91d7-a1ac35ac08c4-0.json ADDED
The diff for this file is too large to render. See raw diff
 
fast/3cb4fa20-89cb-11e8-a485-d149999fe64b-0.json ADDED
@@ -0,0 +1,2961 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id" : "3cb4fa20-89cb-11e8-a485-d149999fe64b-0",
3
+ "name" : "180717-194459-Certificados Camara y Comercio.txt",
4
+ "createdDate" : 1531836662466,
5
+ "version" : 3,
6
+ "text" : "CAMARA DE COMERCIO DE BOGOTA SUPERCADE AMERICAS 21 DE NOVIEMBRE DE 2011 CAMARA DE COMEROOO DE BOGOTA CERTIFICADO DE MATRICULA DE PERSONA NATURAL LA CAMARA DE COMERCIO DE BOGOTA, CON FUNDAMENTO EN LAS MATRICULAS E INSCRIPCIONES DEL REGISTRO MERCANTIL CERTIEICA: NOMBRE RODRIGUEZ MURCIA JULIO C.C 194459 NIT: 194459-9 CERTIFICA: MATRICULA NO 00784836 DEL 23 DE ABRIL DE 1997 CERTIEICA DIRECCION DE NO IFICACION JUDICIAL : CL 54 SUR No 87G-02 MUNICIPIO BOGOTA D.C. EMAIL NOTIFICACİON JUDICIAL : CARPINTERIAMETALICAS RODRIGUEZ@HOTMAIL.CO DIRECCION COMERCIAL DG 54 NO 87G-12 MUNICIPIO : BOGOTA D.C EMAIL COMERCIAL: CARPINTERIAMETALICASRODRIGUEZCHOTMAIL.CO *ADVERTENCIA ESTOS DATOS CORRESPONDEN A LA ULTIMA INFORMACION ** SUMINISTRADA POR EL COMERCIANTE EN EL FORMULARIO DE MATRICULA* Y/O RENOVACION DEL AÑO 2010 CERTIEICA: QUE EL COMERCIANTE NO HA CUMPLIDO CON LA OBLIGACION LEGAL DE RENOVAR SU MATRICULA MERCANTIL DESDE 2011 CERTIFICA RENOVACION DE LA MATRÍCULA : EL 15 DE JUNIO DE 2010 ULTIMO AÑO RENOVADO2010 TOTAL ACTIVOS TOTAL ACTIVOS SIN AJUSTES POR INFLACION 95,000,000 ACTIVIDAD ECONOMICA FABRICACION ELEMENTOS METALICOS, ORNAMENTACION PUERTAS, VENTANAS, REJAS Y VERJAS / ALQUILER DE VEHICULOS DE CARGA CON CONDUCTOR : 120,000,000 CERTIFICA: PROPIETARIO DE LOS SIGUIENTES ESTABLECIMIENTOS DE Comercio NOMBRE CARPINTERIA METALICA RODRIGUEZ DIRECCION COMERCIAL : DG 54 SUR No 87G-02 MUNICIPIO : BOGOTA D.C MATRICULA NO 00784839 DE 23 DE ABRIL DE 1997 RENOVACION DE LA MATRICULA EL 15 DE JUNIO DE 2010 ULTIMO AÑO RENOVADO 2010 CERTIFICA LA INFORMACION ANTERIOR HA SIDO TOMADA DIRECTAMENTE DEL FORMULARIO DE MATRICULA DILIGENCIADO POR EL COMERCIANTE DE CONFORMIDAD CON LO ESTABLECIDO POR LA LEY 962 DE 2005, LOS ACTOS DE REGISTRO-AQUI-CERTIFICADOS-'QUEDAN EN-FIRME, CINCO-H DIAS-HABI DESPUES DE LA FECHA DE INSCRIPCION, SIEMPRE QUE NO SEAN OBJETO DE RECURSOS EN LA VIA GUBERNATĪVA **EL PRESENTE CERTIFICADO NO CONSTITUYE PERMISO DE FUNCIONAMIENTO EN NINGUN CASO SEÑOR EMPRESARIO, SI SU EMPRESA TIENE ACTIVOS INFERIORES A 30.000 SMLMV Y UNA PLANTA DE PERSONAL DE MENOS DE 200 TRABAJADORES, USTED TIENE DERECHO A RECIBIR UN DESCUENTO EN EL PAGO DE LOS PARAFISCALES DE 75% EN EL PRIMER AÑO DE CONSTITUCION DE SU EMPRESA, DE 50% EN EL SEGUNDO AÑO Y DE 25% EN EL TERCER AÑO. LEY 590 DE 2000 Y DECRETO 525 DE 2009. EL SECRETARIO DE LA CAMARA DE COMERCIO, VALOR $ 1,900 DE CONFORMIDAD CON EL DECRETO 2150 DE 1995 Y LA AUTORIZACION IMPARTIDA POR LA SUPERINTENDENCIA DE INDUSTRIA Y COMERCIO, MEDIANTE EL OFICIO DEL 18 DE NO IEMBRE DE 1996, LA FIRMA MECANICA QUE APARECE A CONTINUACION TIENE PLENA VALIDEZ PARA TODOS LOS EFECTOS LEGALES \"\r\n",
7
+ "docLength" : 0,
8
+ "language" : "ES",
9
+ "status" : "READY",
10
+ "modifiedDate" : 1533915143944,
11
+ "documentSet" : [ ],
12
+ "preannotation" : [ ],
13
+ "sentences" : [ {
14
+ "id" : "s0",
15
+ "begin" : 0,
16
+ "end" : 961,
17
+ "text" : "CAMARA DE COMERCIO DE BOGOTA SUPERCADE AMERICAS 21 DE NOVIEMBRE DE 2011 CAMARA DE COMEROOO DE BOGOTA CERTIFICADO DE MATRICULA DE PERSONA NATURAL LA CAMARA DE COMERCIO DE BOGOTA, CON FUNDAMENTO EN LAS MATRICULAS E INSCRIPCIONES DEL REGISTRO MERCANTIL CERTIEICA: NOMBRE RODRIGUEZ MURCIA JULIO C.C 194459 NIT: 194459-9 CERTIFICA: MATRICULA NO 00784836 DEL 23 DE ABRIL DE 1997 CERTIEICA DIRECCION DE NO IFICACION JUDICIAL : CL 54 SUR No 87G-02 MUNICIPIO BOGOTA D.C. EMAIL NOTIFICACİON JUDICIAL : CARPINTERIAMETALICAS RODRIGUEZ@HOTMAIL.CO DIRECCION COMERCIAL DG 54 NO 87G-12 MUNICIPIO : BOGOTA D.C EMAIL COMERCIAL: CARPINTERIAMETALICASRODRIGUEZCHOTMAIL.CO *ADVERTENCIA ESTOS DATOS CORRESPONDEN A LA ULTIMA INFORMACION ** SUMINISTRADA POR EL COMERCIANTE EN EL FORMULARIO DE MATRICULA* Y/O RENOVACION DEL AÑO 2010 CERTIEICA: QUE EL COMERCIANTE NO HA CUMPLIDO CON LA OBLIGACION LEGAL DE RENOVAR SU MATRICULA MERCANTIL DESDE 2011 CERTIFICA RENOVACION DE LA MATRÍCULA :",
18
+ "tokens" : [ {
19
+ "id" : "s0-t0",
20
+ "begin" : 0,
21
+ "end" : 6,
22
+ "text" : "CAMARA",
23
+ "whiteSpace" : false
24
+ }, {
25
+ "id" : "s0-t1",
26
+ "begin" : 7,
27
+ "end" : 9,
28
+ "text" : "DE",
29
+ "whiteSpace" : false
30
+ }, {
31
+ "id" : "s0-t2",
32
+ "begin" : 10,
33
+ "end" : 18,
34
+ "text" : "COMERCIO",
35
+ "whiteSpace" : false
36
+ }, {
37
+ "id" : "s0-t3",
38
+ "begin" : 19,
39
+ "end" : 21,
40
+ "text" : "DE",
41
+ "whiteSpace" : false
42
+ }, {
43
+ "id" : "s0-t4",
44
+ "begin" : 22,
45
+ "end" : 28,
46
+ "text" : "BOGOTA",
47
+ "whiteSpace" : false
48
+ }, {
49
+ "id" : "s0-t5",
50
+ "begin" : 29,
51
+ "end" : 38,
52
+ "text" : "SUPERCADE",
53
+ "whiteSpace" : false
54
+ }, {
55
+ "id" : "s0-t6",
56
+ "begin" : 39,
57
+ "end" : 47,
58
+ "text" : "AMERICAS",
59
+ "whiteSpace" : false
60
+ }, {
61
+ "id" : "s0-t7",
62
+ "begin" : 48,
63
+ "end" : 50,
64
+ "text" : "21",
65
+ "whiteSpace" : false
66
+ }, {
67
+ "id" : "s0-t8",
68
+ "begin" : 51,
69
+ "end" : 53,
70
+ "text" : "DE",
71
+ "whiteSpace" : false
72
+ }, {
73
+ "id" : "s0-t9",
74
+ "begin" : 54,
75
+ "end" : 63,
76
+ "text" : "NOVIEMBRE",
77
+ "whiteSpace" : false
78
+ }, {
79
+ "id" : "s0-t10",
80
+ "begin" : 64,
81
+ "end" : 66,
82
+ "text" : "DE",
83
+ "whiteSpace" : false
84
+ }, {
85
+ "id" : "s0-t11",
86
+ "begin" : 67,
87
+ "end" : 71,
88
+ "text" : "2011",
89
+ "whiteSpace" : false
90
+ }, {
91
+ "id" : "s0-t12",
92
+ "begin" : 72,
93
+ "end" : 78,
94
+ "text" : "CAMARA",
95
+ "whiteSpace" : false
96
+ }, {
97
+ "id" : "s0-t13",
98
+ "begin" : 79,
99
+ "end" : 81,
100
+ "text" : "DE",
101
+ "whiteSpace" : false
102
+ }, {
103
+ "id" : "s0-t14",
104
+ "begin" : 82,
105
+ "end" : 90,
106
+ "text" : "COMEROOO",
107
+ "whiteSpace" : false
108
+ }, {
109
+ "id" : "s0-t15",
110
+ "begin" : 91,
111
+ "end" : 93,
112
+ "text" : "DE",
113
+ "whiteSpace" : false
114
+ }, {
115
+ "id" : "s0-t16",
116
+ "begin" : 94,
117
+ "end" : 100,
118
+ "text" : "BOGOTA",
119
+ "whiteSpace" : false
120
+ }, {
121
+ "id" : "s0-t17",
122
+ "begin" : 101,
123
+ "end" : 112,
124
+ "text" : "CERTIFICADO",
125
+ "whiteSpace" : false
126
+ }, {
127
+ "id" : "s0-t18",
128
+ "begin" : 113,
129
+ "end" : 115,
130
+ "text" : "DE",
131
+ "whiteSpace" : false
132
+ }, {
133
+ "id" : "s0-t19",
134
+ "begin" : 116,
135
+ "end" : 125,
136
+ "text" : "MATRICULA",
137
+ "whiteSpace" : false
138
+ }, {
139
+ "id" : "s0-t20",
140
+ "begin" : 126,
141
+ "end" : 128,
142
+ "text" : "DE",
143
+ "whiteSpace" : false
144
+ }, {
145
+ "id" : "s0-t21",
146
+ "begin" : 129,
147
+ "end" : 136,
148
+ "text" : "PERSONA",
149
+ "whiteSpace" : false
150
+ }, {
151
+ "id" : "s0-t22",
152
+ "begin" : 137,
153
+ "end" : 144,
154
+ "text" : "NATURAL",
155
+ "whiteSpace" : false
156
+ }, {
157
+ "id" : "s0-t23",
158
+ "begin" : 145,
159
+ "end" : 147,
160
+ "text" : "LA",
161
+ "whiteSpace" : false
162
+ }, {
163
+ "id" : "s0-t24",
164
+ "begin" : 148,
165
+ "end" : 154,
166
+ "text" : "CAMARA",
167
+ "whiteSpace" : false
168
+ }, {
169
+ "id" : "s0-t25",
170
+ "begin" : 155,
171
+ "end" : 157,
172
+ "text" : "DE",
173
+ "whiteSpace" : false
174
+ }, {
175
+ "id" : "s0-t26",
176
+ "begin" : 158,
177
+ "end" : 166,
178
+ "text" : "COMERCIO",
179
+ "whiteSpace" : false
180
+ }, {
181
+ "id" : "s0-t27",
182
+ "begin" : 167,
183
+ "end" : 169,
184
+ "text" : "DE",
185
+ "whiteSpace" : false
186
+ }, {
187
+ "id" : "s0-t28",
188
+ "begin" : 170,
189
+ "end" : 176,
190
+ "text" : "BOGOTA",
191
+ "whiteSpace" : false
192
+ }, {
193
+ "id" : "s0-t29",
194
+ "begin" : 176,
195
+ "end" : 177,
196
+ "text" : ",",
197
+ "whiteSpace" : false
198
+ }, {
199
+ "id" : "s0-t30",
200
+ "begin" : 178,
201
+ "end" : 181,
202
+ "text" : "CON",
203
+ "whiteSpace" : false
204
+ }, {
205
+ "id" : "s0-t31",
206
+ "begin" : 182,
207
+ "end" : 192,
208
+ "text" : "FUNDAMENTO",
209
+ "whiteSpace" : false
210
+ }, {
211
+ "id" : "s0-t32",
212
+ "begin" : 193,
213
+ "end" : 195,
214
+ "text" : "EN",
215
+ "whiteSpace" : false
216
+ }, {
217
+ "id" : "s0-t33",
218
+ "begin" : 196,
219
+ "end" : 199,
220
+ "text" : "LAS",
221
+ "whiteSpace" : false
222
+ }, {
223
+ "id" : "s0-t34",
224
+ "begin" : 200,
225
+ "end" : 210,
226
+ "text" : "MATRICULAS",
227
+ "whiteSpace" : false
228
+ }, {
229
+ "id" : "s0-t35",
230
+ "begin" : 211,
231
+ "end" : 212,
232
+ "text" : "E",
233
+ "whiteSpace" : false
234
+ }, {
235
+ "id" : "s0-t36",
236
+ "begin" : 213,
237
+ "end" : 226,
238
+ "text" : "INSCRIPCIONES",
239
+ "whiteSpace" : false
240
+ }, {
241
+ "id" : "s0-t37",
242
+ "begin" : 227,
243
+ "end" : 230,
244
+ "text" : "DEL",
245
+ "whiteSpace" : false
246
+ }, {
247
+ "id" : "s0-t38",
248
+ "begin" : 231,
249
+ "end" : 239,
250
+ "text" : "REGISTRO",
251
+ "whiteSpace" : false
252
+ }, {
253
+ "id" : "s0-t39",
254
+ "begin" : 240,
255
+ "end" : 249,
256
+ "text" : "MERCANTIL",
257
+ "whiteSpace" : false
258
+ }, {
259
+ "id" : "s0-t40",
260
+ "begin" : 250,
261
+ "end" : 259,
262
+ "text" : "CERTIEICA",
263
+ "whiteSpace" : false
264
+ }, {
265
+ "id" : "s0-t41",
266
+ "begin" : 259,
267
+ "end" : 260,
268
+ "text" : ":",
269
+ "whiteSpace" : false
270
+ }, {
271
+ "id" : "s0-t42",
272
+ "begin" : 261,
273
+ "end" : 267,
274
+ "text" : "NOMBRE",
275
+ "whiteSpace" : false
276
+ }, {
277
+ "id" : "s0-t43",
278
+ "begin" : 268,
279
+ "end" : 277,
280
+ "text" : "RODRIGUEZ",
281
+ "whiteSpace" : false
282
+ }, {
283
+ "id" : "s0-t44",
284
+ "begin" : 278,
285
+ "end" : 284,
286
+ "text" : "MURCIA",
287
+ "whiteSpace" : false
288
+ }, {
289
+ "id" : "s0-t45",
290
+ "begin" : 285,
291
+ "end" : 290,
292
+ "text" : "JULIO",
293
+ "whiteSpace" : false
294
+ }, {
295
+ "id" : "s0-t46",
296
+ "begin" : 291,
297
+ "end" : 294,
298
+ "text" : "C.C",
299
+ "whiteSpace" : false
300
+ }, {
301
+ "id" : "s0-t47",
302
+ "begin" : 295,
303
+ "end" : 301,
304
+ "text" : "194459",
305
+ "whiteSpace" : false
306
+ }, {
307
+ "id" : "s0-t48",
308
+ "begin" : 302,
309
+ "end" : 305,
310
+ "text" : "NIT",
311
+ "whiteSpace" : false
312
+ }, {
313
+ "id" : "s0-t49",
314
+ "begin" : 305,
315
+ "end" : 306,
316
+ "text" : ":",
317
+ "whiteSpace" : false
318
+ }, {
319
+ "id" : "s0-t50",
320
+ "begin" : 307,
321
+ "end" : 315,
322
+ "text" : "194459-9",
323
+ "whiteSpace" : false
324
+ }, {
325
+ "id" : "s0-t51",
326
+ "begin" : 316,
327
+ "end" : 325,
328
+ "text" : "CERTIFICA",
329
+ "whiteSpace" : false
330
+ }, {
331
+ "id" : "s0-t52",
332
+ "begin" : 325,
333
+ "end" : 326,
334
+ "text" : ":",
335
+ "whiteSpace" : false
336
+ }, {
337
+ "id" : "s0-t53",
338
+ "begin" : 327,
339
+ "end" : 336,
340
+ "text" : "MATRICULA",
341
+ "whiteSpace" : false
342
+ }, {
343
+ "id" : "s0-t54",
344
+ "begin" : 337,
345
+ "end" : 339,
346
+ "text" : "NO",
347
+ "whiteSpace" : false
348
+ }, {
349
+ "id" : "s0-t55",
350
+ "begin" : 340,
351
+ "end" : 348,
352
+ "text" : "00784836",
353
+ "whiteSpace" : false
354
+ }, {
355
+ "id" : "s0-t56",
356
+ "begin" : 349,
357
+ "end" : 352,
358
+ "text" : "DEL",
359
+ "whiteSpace" : false
360
+ }, {
361
+ "id" : "s0-t57",
362
+ "begin" : 353,
363
+ "end" : 355,
364
+ "text" : "23",
365
+ "whiteSpace" : false
366
+ }, {
367
+ "id" : "s0-t58",
368
+ "begin" : 356,
369
+ "end" : 358,
370
+ "text" : "DE",
371
+ "whiteSpace" : false
372
+ }, {
373
+ "id" : "s0-t59",
374
+ "begin" : 359,
375
+ "end" : 364,
376
+ "text" : "ABRIL",
377
+ "whiteSpace" : false
378
+ }, {
379
+ "id" : "s0-t60",
380
+ "begin" : 365,
381
+ "end" : 367,
382
+ "text" : "DE",
383
+ "whiteSpace" : false
384
+ }, {
385
+ "id" : "s0-t61",
386
+ "begin" : 368,
387
+ "end" : 372,
388
+ "text" : "1997",
389
+ "whiteSpace" : false
390
+ }, {
391
+ "id" : "s0-t62",
392
+ "begin" : 373,
393
+ "end" : 382,
394
+ "text" : "CERTIEICA",
395
+ "whiteSpace" : false
396
+ }, {
397
+ "id" : "s0-t63",
398
+ "begin" : 383,
399
+ "end" : 392,
400
+ "text" : "DIRECCION",
401
+ "whiteSpace" : false
402
+ }, {
403
+ "id" : "s0-t64",
404
+ "begin" : 393,
405
+ "end" : 395,
406
+ "text" : "DE",
407
+ "whiteSpace" : false
408
+ }, {
409
+ "id" : "s0-t65",
410
+ "begin" : 396,
411
+ "end" : 398,
412
+ "text" : "NO",
413
+ "whiteSpace" : false
414
+ }, {
415
+ "id" : "s0-t66",
416
+ "begin" : 399,
417
+ "end" : 408,
418
+ "text" : "IFICACION",
419
+ "whiteSpace" : false
420
+ }, {
421
+ "id" : "s0-t67",
422
+ "begin" : 409,
423
+ "end" : 417,
424
+ "text" : "JUDICIAL",
425
+ "whiteSpace" : false
426
+ }, {
427
+ "id" : "s0-t68",
428
+ "begin" : 418,
429
+ "end" : 419,
430
+ "text" : ":",
431
+ "whiteSpace" : false
432
+ }, {
433
+ "id" : "s0-t69",
434
+ "begin" : 420,
435
+ "end" : 422,
436
+ "text" : "CL",
437
+ "whiteSpace" : false
438
+ }, {
439
+ "id" : "s0-t70",
440
+ "begin" : 423,
441
+ "end" : 425,
442
+ "text" : "54",
443
+ "whiteSpace" : false
444
+ }, {
445
+ "id" : "s0-t71",
446
+ "begin" : 426,
447
+ "end" : 429,
448
+ "text" : "SUR",
449
+ "whiteSpace" : false
450
+ }, {
451
+ "id" : "s0-t72",
452
+ "begin" : 430,
453
+ "end" : 432,
454
+ "text" : "No",
455
+ "whiteSpace" : false
456
+ }, {
457
+ "id" : "s0-t73",
458
+ "begin" : 434,
459
+ "end" : 440,
460
+ "text" : "87G-02",
461
+ "whiteSpace" : false
462
+ }, {
463
+ "id" : "s0-t74",
464
+ "begin" : 441,
465
+ "end" : 450,
466
+ "text" : "MUNICIPIO",
467
+ "whiteSpace" : false
468
+ }, {
469
+ "id" : "s0-t75",
470
+ "begin" : 451,
471
+ "end" : 457,
472
+ "text" : "BOGOTA",
473
+ "whiteSpace" : false
474
+ }, {
475
+ "id" : "s0-t76",
476
+ "begin" : 458,
477
+ "end" : 462,
478
+ "text" : "D.C.",
479
+ "whiteSpace" : false
480
+ }, {
481
+ "id" : "s0-t77",
482
+ "begin" : 463,
483
+ "end" : 468,
484
+ "text" : "EMAIL",
485
+ "whiteSpace" : false
486
+ }, {
487
+ "id" : "s0-t78",
488
+ "begin" : 469,
489
+ "end" : 481,
490
+ "text" : "NOTIFICACİON",
491
+ "whiteSpace" : false
492
+ }, {
493
+ "id" : "s0-t79",
494
+ "begin" : 482,
495
+ "end" : 490,
496
+ "text" : "JUDICIAL",
497
+ "whiteSpace" : false
498
+ }, {
499
+ "id" : "s0-t80",
500
+ "begin" : 491,
501
+ "end" : 492,
502
+ "text" : ":",
503
+ "whiteSpace" : false
504
+ }, {
505
+ "id" : "s0-t81",
506
+ "begin" : 493,
507
+ "end" : 513,
508
+ "text" : "CARPINTERIAMETALICAS",
509
+ "whiteSpace" : false
510
+ }, {
511
+ "id" : "s0-t82",
512
+ "begin" : 514,
513
+ "end" : 534,
514
+ "text" : "RODRIGUEZ@HOTMAIL.CO",
515
+ "whiteSpace" : false
516
+ }, {
517
+ "id" : "s0-t83",
518
+ "begin" : 535,
519
+ "end" : 544,
520
+ "text" : "DIRECCION",
521
+ "whiteSpace" : false
522
+ }, {
523
+ "id" : "s0-t84",
524
+ "begin" : 545,
525
+ "end" : 554,
526
+ "text" : "COMERCIAL",
527
+ "whiteSpace" : false
528
+ }, {
529
+ "id" : "s0-t85",
530
+ "begin" : 555,
531
+ "end" : 557,
532
+ "text" : "DG",
533
+ "whiteSpace" : false
534
+ }, {
535
+ "id" : "s0-t86",
536
+ "begin" : 558,
537
+ "end" : 560,
538
+ "text" : "54",
539
+ "whiteSpace" : false
540
+ }, {
541
+ "id" : "s0-t87",
542
+ "begin" : 561,
543
+ "end" : 563,
544
+ "text" : "NO",
545
+ "whiteSpace" : false
546
+ }, {
547
+ "id" : "s0-t88",
548
+ "begin" : 565,
549
+ "end" : 571,
550
+ "text" : "87G-12",
551
+ "whiteSpace" : false
552
+ }, {
553
+ "id" : "s0-t89",
554
+ "begin" : 572,
555
+ "end" : 581,
556
+ "text" : "MUNICIPIO",
557
+ "whiteSpace" : false
558
+ }, {
559
+ "id" : "s0-t90",
560
+ "begin" : 582,
561
+ "end" : 583,
562
+ "text" : ":",
563
+ "whiteSpace" : false
564
+ }, {
565
+ "id" : "s0-t91",
566
+ "begin" : 584,
567
+ "end" : 590,
568
+ "text" : "BOGOTA",
569
+ "whiteSpace" : false
570
+ }, {
571
+ "id" : "s0-t92",
572
+ "begin" : 591,
573
+ "end" : 594,
574
+ "text" : "D.C",
575
+ "whiteSpace" : false
576
+ }, {
577
+ "id" : "s0-t93",
578
+ "begin" : 595,
579
+ "end" : 600,
580
+ "text" : "EMAIL",
581
+ "whiteSpace" : false
582
+ }, {
583
+ "id" : "s0-t94",
584
+ "begin" : 601,
585
+ "end" : 610,
586
+ "text" : "COMERCIAL",
587
+ "whiteSpace" : false
588
+ }, {
589
+ "id" : "s0-t95",
590
+ "begin" : 610,
591
+ "end" : 611,
592
+ "text" : ":",
593
+ "whiteSpace" : false
594
+ }, {
595
+ "id" : "s0-t96",
596
+ "begin" : 612,
597
+ "end" : 652,
598
+ "text" : "CARPINTERIAMETALICASRODRIGUEZCHOTMAIL.CO",
599
+ "whiteSpace" : false
600
+ }, {
601
+ "id" : "s0-t97",
602
+ "begin" : 653,
603
+ "end" : 665,
604
+ "text" : "*ADVERTENCIA",
605
+ "whiteSpace" : false
606
+ }, {
607
+ "id" : "s0-t98",
608
+ "begin" : 666,
609
+ "end" : 671,
610
+ "text" : "ESTOS",
611
+ "whiteSpace" : false
612
+ }, {
613
+ "id" : "s0-t99",
614
+ "begin" : 672,
615
+ "end" : 677,
616
+ "text" : "DATOS",
617
+ "whiteSpace" : false
618
+ }, {
619
+ "id" : "s0-t100",
620
+ "begin" : 678,
621
+ "end" : 690,
622
+ "text" : "CORRESPONDEN",
623
+ "whiteSpace" : false
624
+ }, {
625
+ "id" : "s0-t101",
626
+ "begin" : 691,
627
+ "end" : 692,
628
+ "text" : "A",
629
+ "whiteSpace" : false
630
+ }, {
631
+ "id" : "s0-t102",
632
+ "begin" : 693,
633
+ "end" : 695,
634
+ "text" : "LA",
635
+ "whiteSpace" : false
636
+ }, {
637
+ "id" : "s0-t103",
638
+ "begin" : 696,
639
+ "end" : 702,
640
+ "text" : "ULTIMA",
641
+ "whiteSpace" : false
642
+ }, {
643
+ "id" : "s0-t104",
644
+ "begin" : 703,
645
+ "end" : 714,
646
+ "text" : "INFORMACION",
647
+ "whiteSpace" : false
648
+ }, {
649
+ "id" : "s0-t105",
650
+ "begin" : 715,
651
+ "end" : 717,
652
+ "text" : "**",
653
+ "whiteSpace" : false
654
+ }, {
655
+ "id" : "s0-t106",
656
+ "begin" : 718,
657
+ "end" : 730,
658
+ "text" : "SUMINISTRADA",
659
+ "whiteSpace" : false
660
+ }, {
661
+ "id" : "s0-t107",
662
+ "begin" : 731,
663
+ "end" : 734,
664
+ "text" : "POR",
665
+ "whiteSpace" : false
666
+ }, {
667
+ "id" : "s0-t108",
668
+ "begin" : 735,
669
+ "end" : 737,
670
+ "text" : "EL",
671
+ "whiteSpace" : false
672
+ }, {
673
+ "id" : "s0-t109",
674
+ "begin" : 738,
675
+ "end" : 749,
676
+ "text" : "COMERCIANTE",
677
+ "whiteSpace" : false
678
+ }, {
679
+ "id" : "s0-t110",
680
+ "begin" : 750,
681
+ "end" : 752,
682
+ "text" : "EN",
683
+ "whiteSpace" : false
684
+ }, {
685
+ "id" : "s0-t111",
686
+ "begin" : 753,
687
+ "end" : 755,
688
+ "text" : "EL",
689
+ "whiteSpace" : false
690
+ }, {
691
+ "id" : "s0-t112",
692
+ "begin" : 756,
693
+ "end" : 766,
694
+ "text" : "FORMULARIO",
695
+ "whiteSpace" : false
696
+ }, {
697
+ "id" : "s0-t113",
698
+ "begin" : 767,
699
+ "end" : 769,
700
+ "text" : "DE",
701
+ "whiteSpace" : false
702
+ }, {
703
+ "id" : "s0-t114",
704
+ "begin" : 770,
705
+ "end" : 780,
706
+ "text" : "MATRICULA*",
707
+ "whiteSpace" : false
708
+ }, {
709
+ "id" : "s0-t115",
710
+ "begin" : 781,
711
+ "end" : 784,
712
+ "text" : "Y/O",
713
+ "whiteSpace" : false
714
+ }, {
715
+ "id" : "s0-t116",
716
+ "begin" : 785,
717
+ "end" : 795,
718
+ "text" : "RENOVACION",
719
+ "whiteSpace" : false
720
+ }, {
721
+ "id" : "s0-t117",
722
+ "begin" : 796,
723
+ "end" : 799,
724
+ "text" : "DEL",
725
+ "whiteSpace" : false
726
+ }, {
727
+ "id" : "s0-t118",
728
+ "begin" : 800,
729
+ "end" : 803,
730
+ "text" : "AÑO",
731
+ "whiteSpace" : false
732
+ }, {
733
+ "id" : "s0-t119",
734
+ "begin" : 804,
735
+ "end" : 808,
736
+ "text" : "2010",
737
+ "whiteSpace" : false
738
+ }, {
739
+ "id" : "s0-t120",
740
+ "begin" : 809,
741
+ "end" : 818,
742
+ "text" : "CERTIEICA",
743
+ "whiteSpace" : false
744
+ }, {
745
+ "id" : "s0-t121",
746
+ "begin" : 818,
747
+ "end" : 819,
748
+ "text" : ":",
749
+ "whiteSpace" : false
750
+ }, {
751
+ "id" : "s0-t122",
752
+ "begin" : 820,
753
+ "end" : 823,
754
+ "text" : "QUE",
755
+ "whiteSpace" : false
756
+ }, {
757
+ "id" : "s0-t123",
758
+ "begin" : 824,
759
+ "end" : 826,
760
+ "text" : "EL",
761
+ "whiteSpace" : false
762
+ }, {
763
+ "id" : "s0-t124",
764
+ "begin" : 827,
765
+ "end" : 838,
766
+ "text" : "COMERCIANTE",
767
+ "whiteSpace" : false
768
+ }, {
769
+ "id" : "s0-t125",
770
+ "begin" : 839,
771
+ "end" : 841,
772
+ "text" : "NO",
773
+ "whiteSpace" : false
774
+ }, {
775
+ "id" : "s0-t126",
776
+ "begin" : 842,
777
+ "end" : 844,
778
+ "text" : "HA",
779
+ "whiteSpace" : false
780
+ }, {
781
+ "id" : "s0-t127",
782
+ "begin" : 845,
783
+ "end" : 853,
784
+ "text" : "CUMPLIDO",
785
+ "whiteSpace" : false
786
+ }, {
787
+ "id" : "s0-t128",
788
+ "begin" : 854,
789
+ "end" : 857,
790
+ "text" : "CON",
791
+ "whiteSpace" : false
792
+ }, {
793
+ "id" : "s0-t129",
794
+ "begin" : 858,
795
+ "end" : 860,
796
+ "text" : "LA",
797
+ "whiteSpace" : false
798
+ }, {
799
+ "id" : "s0-t130",
800
+ "begin" : 861,
801
+ "end" : 871,
802
+ "text" : "OBLIGACION",
803
+ "whiteSpace" : false
804
+ }, {
805
+ "id" : "s0-t131",
806
+ "begin" : 872,
807
+ "end" : 877,
808
+ "text" : "LEGAL",
809
+ "whiteSpace" : false
810
+ }, {
811
+ "id" : "s0-t132",
812
+ "begin" : 878,
813
+ "end" : 880,
814
+ "text" : "DE",
815
+ "whiteSpace" : false
816
+ }, {
817
+ "id" : "s0-t133",
818
+ "begin" : 881,
819
+ "end" : 888,
820
+ "text" : "RENOVAR",
821
+ "whiteSpace" : false
822
+ }, {
823
+ "id" : "s0-t134",
824
+ "begin" : 889,
825
+ "end" : 891,
826
+ "text" : "SU",
827
+ "whiteSpace" : false
828
+ }, {
829
+ "id" : "s0-t135",
830
+ "begin" : 892,
831
+ "end" : 901,
832
+ "text" : "MATRICULA",
833
+ "whiteSpace" : false
834
+ }, {
835
+ "id" : "s0-t136",
836
+ "begin" : 902,
837
+ "end" : 911,
838
+ "text" : "MERCANTIL",
839
+ "whiteSpace" : false
840
+ }, {
841
+ "id" : "s0-t137",
842
+ "begin" : 912,
843
+ "end" : 917,
844
+ "text" : "DESDE",
845
+ "whiteSpace" : false
846
+ }, {
847
+ "id" : "s0-t138",
848
+ "begin" : 918,
849
+ "end" : 922,
850
+ "text" : "2011",
851
+ "whiteSpace" : false
852
+ }, {
853
+ "id" : "s0-t139",
854
+ "begin" : 923,
855
+ "end" : 932,
856
+ "text" : "CERTIFICA",
857
+ "whiteSpace" : false
858
+ }, {
859
+ "id" : "s0-t140",
860
+ "begin" : 933,
861
+ "end" : 943,
862
+ "text" : "RENOVACION",
863
+ "whiteSpace" : false
864
+ }, {
865
+ "id" : "s0-t141",
866
+ "begin" : 944,
867
+ "end" : 946,
868
+ "text" : "DE",
869
+ "whiteSpace" : false
870
+ }, {
871
+ "id" : "s0-t142",
872
+ "begin" : 947,
873
+ "end" : 949,
874
+ "text" : "LA",
875
+ "whiteSpace" : false
876
+ }, {
877
+ "id" : "s0-t143",
878
+ "begin" : 950,
879
+ "end" : 959,
880
+ "text" : "MATRÍCULA",
881
+ "whiteSpace" : false
882
+ }, {
883
+ "id" : "s0-t144",
884
+ "begin" : 960,
885
+ "end" : 961,
886
+ "text" : ":",
887
+ "whiteSpace" : false
888
+ } ]
889
+ }, {
890
+ "id" : "s1",
891
+ "begin" : 962,
892
+ "end" : 1822,
893
+ "text" : "EL 15 DE JUNIO DE 2010 ULTIMO AÑO RENOVADO2010 TOTAL ACTIVOS TOTAL ACTIVOS SIN AJUSTES POR INFLACION 95,000,000 ACTIVIDAD ECONOMICA FABRICACION ELEMENTOS METALICOS, ORNAMENTACION PUERTAS, VENTANAS, REJAS Y VERJAS / ALQUILER DE VEHICULOS DE CARGA CON CONDUCTOR : 120,000,000 CERTIFICA: PROPIETARIO DE LOS SIGUIENTES ESTABLECIMIENTOS DE Comercio NOMBRE CARPINTERIA METALICA RODRIGUEZ DIRECCION COMERCIAL : DG 54 SUR No 87G-02 MUNICIPIO : BOGOTA D.C MATRICULA NO 00784839 DE 23 DE ABRIL DE 1997 RENOVACION DE LA MATRICULA EL 15 DE JUNIO DE 2010 ULTIMO AÑO RENOVADO 2010 CERTIFICA LA INFORMACION ANTERIOR HA SIDO TOMADA DIRECTAMENTE DEL FORMULARIO DE MATRICULA DILIGENCIADO POR EL COMERCIANTE DE CONFORMIDAD CON LO ESTABLECIDO POR LA LEY 962 DE 2005, LOS ACTOS DE REGISTRO-AQUI-CERTIFICADOS-'QUEDAN EN-FIRME, CINCO-H DIAS-HABI DESPUES DE LA FECHA DE INSCRIPCION,",
894
+ "tokens" : [ {
895
+ "id" : "s1-t0",
896
+ "begin" : 962,
897
+ "end" : 964,
898
+ "text" : "EL",
899
+ "whiteSpace" : false
900
+ }, {
901
+ "id" : "s1-t1",
902
+ "begin" : 965,
903
+ "end" : 967,
904
+ "text" : "15",
905
+ "whiteSpace" : false
906
+ }, {
907
+ "id" : "s1-t2",
908
+ "begin" : 968,
909
+ "end" : 970,
910
+ "text" : "DE",
911
+ "whiteSpace" : false
912
+ }, {
913
+ "id" : "s1-t3",
914
+ "begin" : 971,
915
+ "end" : 976,
916
+ "text" : "JUNIO",
917
+ "whiteSpace" : false
918
+ }, {
919
+ "id" : "s1-t4",
920
+ "begin" : 977,
921
+ "end" : 979,
922
+ "text" : "DE",
923
+ "whiteSpace" : false
924
+ }, {
925
+ "id" : "s1-t5",
926
+ "begin" : 980,
927
+ "end" : 984,
928
+ "text" : "2010",
929
+ "whiteSpace" : false
930
+ }, {
931
+ "id" : "s1-t6",
932
+ "begin" : 985,
933
+ "end" : 991,
934
+ "text" : "ULTIMO",
935
+ "whiteSpace" : false
936
+ }, {
937
+ "id" : "s1-t7",
938
+ "begin" : 992,
939
+ "end" : 995,
940
+ "text" : "AÑO",
941
+ "whiteSpace" : false
942
+ }, {
943
+ "id" : "s1-t8",
944
+ "begin" : 996,
945
+ "end" : 1008,
946
+ "text" : "RENOVADO2010",
947
+ "whiteSpace" : false
948
+ }, {
949
+ "id" : "s1-t9",
950
+ "begin" : 1009,
951
+ "end" : 1014,
952
+ "text" : "TOTAL",
953
+ "whiteSpace" : false
954
+ }, {
955
+ "id" : "s1-t10",
956
+ "begin" : 1015,
957
+ "end" : 1022,
958
+ "text" : "ACTIVOS",
959
+ "whiteSpace" : false
960
+ }, {
961
+ "id" : "s1-t11",
962
+ "begin" : 1023,
963
+ "end" : 1028,
964
+ "text" : "TOTAL",
965
+ "whiteSpace" : false
966
+ }, {
967
+ "id" : "s1-t12",
968
+ "begin" : 1029,
969
+ "end" : 1036,
970
+ "text" : "ACTIVOS",
971
+ "whiteSpace" : false
972
+ }, {
973
+ "id" : "s1-t13",
974
+ "begin" : 1037,
975
+ "end" : 1040,
976
+ "text" : "SIN",
977
+ "whiteSpace" : false
978
+ }, {
979
+ "id" : "s1-t14",
980
+ "begin" : 1041,
981
+ "end" : 1048,
982
+ "text" : "AJUSTES",
983
+ "whiteSpace" : false
984
+ }, {
985
+ "id" : "s1-t15",
986
+ "begin" : 1049,
987
+ "end" : 1052,
988
+ "text" : "POR",
989
+ "whiteSpace" : false
990
+ }, {
991
+ "id" : "s1-t16",
992
+ "begin" : 1053,
993
+ "end" : 1062,
994
+ "text" : "INFLACION",
995
+ "whiteSpace" : false
996
+ }, {
997
+ "id" : "s1-t17",
998
+ "begin" : 1063,
999
+ "end" : 1073,
1000
+ "text" : "95,000,000",
1001
+ "whiteSpace" : false
1002
+ }, {
1003
+ "id" : "s1-t18",
1004
+ "begin" : 1074,
1005
+ "end" : 1083,
1006
+ "text" : "ACTIVIDAD",
1007
+ "whiteSpace" : false
1008
+ }, {
1009
+ "id" : "s1-t19",
1010
+ "begin" : 1084,
1011
+ "end" : 1093,
1012
+ "text" : "ECONOMICA",
1013
+ "whiteSpace" : false
1014
+ }, {
1015
+ "id" : "s1-t20",
1016
+ "begin" : 1094,
1017
+ "end" : 1105,
1018
+ "text" : "FABRICACION",
1019
+ "whiteSpace" : false
1020
+ }, {
1021
+ "id" : "s1-t21",
1022
+ "begin" : 1106,
1023
+ "end" : 1115,
1024
+ "text" : "ELEMENTOS",
1025
+ "whiteSpace" : false
1026
+ }, {
1027
+ "id" : "s1-t22",
1028
+ "begin" : 1116,
1029
+ "end" : 1125,
1030
+ "text" : "METALICOS",
1031
+ "whiteSpace" : false
1032
+ }, {
1033
+ "id" : "s1-t23",
1034
+ "begin" : 1125,
1035
+ "end" : 1126,
1036
+ "text" : ",",
1037
+ "whiteSpace" : false
1038
+ }, {
1039
+ "id" : "s1-t24",
1040
+ "begin" : 1127,
1041
+ "end" : 1140,
1042
+ "text" : "ORNAMENTACION",
1043
+ "whiteSpace" : false
1044
+ }, {
1045
+ "id" : "s1-t25",
1046
+ "begin" : 1141,
1047
+ "end" : 1148,
1048
+ "text" : "PUERTAS",
1049
+ "whiteSpace" : false
1050
+ }, {
1051
+ "id" : "s1-t26",
1052
+ "begin" : 1148,
1053
+ "end" : 1149,
1054
+ "text" : ",",
1055
+ "whiteSpace" : false
1056
+ }, {
1057
+ "id" : "s1-t27",
1058
+ "begin" : 1150,
1059
+ "end" : 1158,
1060
+ "text" : "VENTANAS",
1061
+ "whiteSpace" : false
1062
+ }, {
1063
+ "id" : "s1-t28",
1064
+ "begin" : 1158,
1065
+ "end" : 1159,
1066
+ "text" : ",",
1067
+ "whiteSpace" : false
1068
+ }, {
1069
+ "id" : "s1-t29",
1070
+ "begin" : 1160,
1071
+ "end" : 1165,
1072
+ "text" : "REJAS",
1073
+ "whiteSpace" : false
1074
+ }, {
1075
+ "id" : "s1-t30",
1076
+ "begin" : 1166,
1077
+ "end" : 1167,
1078
+ "text" : "Y",
1079
+ "whiteSpace" : false
1080
+ }, {
1081
+ "id" : "s1-t31",
1082
+ "begin" : 1168,
1083
+ "end" : 1174,
1084
+ "text" : "VERJAS",
1085
+ "whiteSpace" : false
1086
+ }, {
1087
+ "id" : "s1-t32",
1088
+ "begin" : 1175,
1089
+ "end" : 1176,
1090
+ "text" : "/",
1091
+ "whiteSpace" : false
1092
+ }, {
1093
+ "id" : "s1-t33",
1094
+ "begin" : 1177,
1095
+ "end" : 1185,
1096
+ "text" : "ALQUILER",
1097
+ "whiteSpace" : false
1098
+ }, {
1099
+ "id" : "s1-t34",
1100
+ "begin" : 1186,
1101
+ "end" : 1188,
1102
+ "text" : "DE",
1103
+ "whiteSpace" : false
1104
+ }, {
1105
+ "id" : "s1-t35",
1106
+ "begin" : 1189,
1107
+ "end" : 1198,
1108
+ "text" : "VEHICULOS",
1109
+ "whiteSpace" : false
1110
+ }, {
1111
+ "id" : "s1-t36",
1112
+ "begin" : 1199,
1113
+ "end" : 1201,
1114
+ "text" : "DE",
1115
+ "whiteSpace" : false
1116
+ }, {
1117
+ "id" : "s1-t37",
1118
+ "begin" : 1202,
1119
+ "end" : 1207,
1120
+ "text" : "CARGA",
1121
+ "whiteSpace" : false
1122
+ }, {
1123
+ "id" : "s1-t38",
1124
+ "begin" : 1208,
1125
+ "end" : 1211,
1126
+ "text" : "CON",
1127
+ "whiteSpace" : false
1128
+ }, {
1129
+ "id" : "s1-t39",
1130
+ "begin" : 1212,
1131
+ "end" : 1221,
1132
+ "text" : "CONDUCTOR",
1133
+ "whiteSpace" : false
1134
+ }, {
1135
+ "id" : "s1-t40",
1136
+ "begin" : 1222,
1137
+ "end" : 1223,
1138
+ "text" : ":",
1139
+ "whiteSpace" : false
1140
+ }, {
1141
+ "id" : "s1-t41",
1142
+ "begin" : 1224,
1143
+ "end" : 1235,
1144
+ "text" : "120,000,000",
1145
+ "whiteSpace" : false
1146
+ }, {
1147
+ "id" : "s1-t42",
1148
+ "begin" : 1236,
1149
+ "end" : 1245,
1150
+ "text" : "CERTIFICA",
1151
+ "whiteSpace" : false
1152
+ }, {
1153
+ "id" : "s1-t43",
1154
+ "begin" : 1245,
1155
+ "end" : 1246,
1156
+ "text" : ":",
1157
+ "whiteSpace" : false
1158
+ }, {
1159
+ "id" : "s1-t44",
1160
+ "begin" : 1247,
1161
+ "end" : 1258,
1162
+ "text" : "PROPIETARIO",
1163
+ "whiteSpace" : false
1164
+ }, {
1165
+ "id" : "s1-t45",
1166
+ "begin" : 1259,
1167
+ "end" : 1261,
1168
+ "text" : "DE",
1169
+ "whiteSpace" : false
1170
+ }, {
1171
+ "id" : "s1-t46",
1172
+ "begin" : 1262,
1173
+ "end" : 1265,
1174
+ "text" : "LOS",
1175
+ "whiteSpace" : false
1176
+ }, {
1177
+ "id" : "s1-t47",
1178
+ "begin" : 1266,
1179
+ "end" : 1276,
1180
+ "text" : "SIGUIENTES",
1181
+ "whiteSpace" : false
1182
+ }, {
1183
+ "id" : "s1-t48",
1184
+ "begin" : 1277,
1185
+ "end" : 1293,
1186
+ "text" : "ESTABLECIMIENTOS",
1187
+ "whiteSpace" : false
1188
+ }, {
1189
+ "id" : "s1-t49",
1190
+ "begin" : 1294,
1191
+ "end" : 1296,
1192
+ "text" : "DE",
1193
+ "whiteSpace" : false
1194
+ }, {
1195
+ "id" : "s1-t50",
1196
+ "begin" : 1297,
1197
+ "end" : 1305,
1198
+ "text" : "Comercio",
1199
+ "whiteSpace" : false
1200
+ }, {
1201
+ "id" : "s1-t51",
1202
+ "begin" : 1306,
1203
+ "end" : 1312,
1204
+ "text" : "NOMBRE",
1205
+ "whiteSpace" : false
1206
+ }, {
1207
+ "id" : "s1-t52",
1208
+ "begin" : 1313,
1209
+ "end" : 1324,
1210
+ "text" : "CARPINTERIA",
1211
+ "whiteSpace" : false
1212
+ }, {
1213
+ "id" : "s1-t53",
1214
+ "begin" : 1325,
1215
+ "end" : 1333,
1216
+ "text" : "METALICA",
1217
+ "whiteSpace" : false
1218
+ }, {
1219
+ "id" : "s1-t54",
1220
+ "begin" : 1334,
1221
+ "end" : 1343,
1222
+ "text" : "RODRIGUEZ",
1223
+ "whiteSpace" : false
1224
+ }, {
1225
+ "id" : "s1-t55",
1226
+ "begin" : 1344,
1227
+ "end" : 1353,
1228
+ "text" : "DIRECCION",
1229
+ "whiteSpace" : false
1230
+ }, {
1231
+ "id" : "s1-t56",
1232
+ "begin" : 1354,
1233
+ "end" : 1363,
1234
+ "text" : "COMERCIAL",
1235
+ "whiteSpace" : false
1236
+ }, {
1237
+ "id" : "s1-t57",
1238
+ "begin" : 1364,
1239
+ "end" : 1365,
1240
+ "text" : ":",
1241
+ "whiteSpace" : false
1242
+ }, {
1243
+ "id" : "s1-t58",
1244
+ "begin" : 1366,
1245
+ "end" : 1368,
1246
+ "text" : "DG",
1247
+ "whiteSpace" : false
1248
+ }, {
1249
+ "id" : "s1-t59",
1250
+ "begin" : 1369,
1251
+ "end" : 1371,
1252
+ "text" : "54",
1253
+ "whiteSpace" : false
1254
+ }, {
1255
+ "id" : "s1-t60",
1256
+ "begin" : 1372,
1257
+ "end" : 1375,
1258
+ "text" : "SUR",
1259
+ "whiteSpace" : false
1260
+ }, {
1261
+ "id" : "s1-t61",
1262
+ "begin" : 1376,
1263
+ "end" : 1378,
1264
+ "text" : "No",
1265
+ "whiteSpace" : false
1266
+ }, {
1267
+ "id" : "s1-t62",
1268
+ "begin" : 1380,
1269
+ "end" : 1386,
1270
+ "text" : "87G-02",
1271
+ "whiteSpace" : false
1272
+ }, {
1273
+ "id" : "s1-t63",
1274
+ "begin" : 1387,
1275
+ "end" : 1396,
1276
+ "text" : "MUNICIPIO",
1277
+ "whiteSpace" : false
1278
+ }, {
1279
+ "id" : "s1-t64",
1280
+ "begin" : 1397,
1281
+ "end" : 1398,
1282
+ "text" : ":",
1283
+ "whiteSpace" : false
1284
+ }, {
1285
+ "id" : "s1-t65",
1286
+ "begin" : 1399,
1287
+ "end" : 1405,
1288
+ "text" : "BOGOTA",
1289
+ "whiteSpace" : false
1290
+ }, {
1291
+ "id" : "s1-t66",
1292
+ "begin" : 1406,
1293
+ "end" : 1409,
1294
+ "text" : "D.C",
1295
+ "whiteSpace" : false
1296
+ }, {
1297
+ "id" : "s1-t67",
1298
+ "begin" : 1410,
1299
+ "end" : 1419,
1300
+ "text" : "MATRICULA",
1301
+ "whiteSpace" : false
1302
+ }, {
1303
+ "id" : "s1-t68",
1304
+ "begin" : 1420,
1305
+ "end" : 1422,
1306
+ "text" : "NO",
1307
+ "whiteSpace" : false
1308
+ }, {
1309
+ "id" : "s1-t69",
1310
+ "begin" : 1423,
1311
+ "end" : 1431,
1312
+ "text" : "00784839",
1313
+ "whiteSpace" : false
1314
+ }, {
1315
+ "id" : "s1-t70",
1316
+ "begin" : 1432,
1317
+ "end" : 1434,
1318
+ "text" : "DE",
1319
+ "whiteSpace" : false
1320
+ }, {
1321
+ "id" : "s1-t71",
1322
+ "begin" : 1435,
1323
+ "end" : 1437,
1324
+ "text" : "23",
1325
+ "whiteSpace" : false
1326
+ }, {
1327
+ "id" : "s1-t72",
1328
+ "begin" : 1438,
1329
+ "end" : 1440,
1330
+ "text" : "DE",
1331
+ "whiteSpace" : false
1332
+ }, {
1333
+ "id" : "s1-t73",
1334
+ "begin" : 1441,
1335
+ "end" : 1446,
1336
+ "text" : "ABRIL",
1337
+ "whiteSpace" : false
1338
+ }, {
1339
+ "id" : "s1-t74",
1340
+ "begin" : 1447,
1341
+ "end" : 1449,
1342
+ "text" : "DE",
1343
+ "whiteSpace" : false
1344
+ }, {
1345
+ "id" : "s1-t75",
1346
+ "begin" : 1450,
1347
+ "end" : 1454,
1348
+ "text" : "1997",
1349
+ "whiteSpace" : false
1350
+ }, {
1351
+ "id" : "s1-t76",
1352
+ "begin" : 1455,
1353
+ "end" : 1465,
1354
+ "text" : "RENOVACION",
1355
+ "whiteSpace" : false
1356
+ }, {
1357
+ "id" : "s1-t77",
1358
+ "begin" : 1466,
1359
+ "end" : 1468,
1360
+ "text" : "DE",
1361
+ "whiteSpace" : false
1362
+ }, {
1363
+ "id" : "s1-t78",
1364
+ "begin" : 1469,
1365
+ "end" : 1471,
1366
+ "text" : "LA",
1367
+ "whiteSpace" : false
1368
+ }, {
1369
+ "id" : "s1-t79",
1370
+ "begin" : 1472,
1371
+ "end" : 1481,
1372
+ "text" : "MATRICULA",
1373
+ "whiteSpace" : false
1374
+ }, {
1375
+ "id" : "s1-t80",
1376
+ "begin" : 1482,
1377
+ "end" : 1484,
1378
+ "text" : "EL",
1379
+ "whiteSpace" : false
1380
+ }, {
1381
+ "id" : "s1-t81",
1382
+ "begin" : 1485,
1383
+ "end" : 1487,
1384
+ "text" : "15",
1385
+ "whiteSpace" : false
1386
+ }, {
1387
+ "id" : "s1-t82",
1388
+ "begin" : 1488,
1389
+ "end" : 1490,
1390
+ "text" : "DE",
1391
+ "whiteSpace" : false
1392
+ }, {
1393
+ "id" : "s1-t83",
1394
+ "begin" : 1491,
1395
+ "end" : 1496,
1396
+ "text" : "JUNIO",
1397
+ "whiteSpace" : false
1398
+ }, {
1399
+ "id" : "s1-t84",
1400
+ "begin" : 1497,
1401
+ "end" : 1499,
1402
+ "text" : "DE",
1403
+ "whiteSpace" : false
1404
+ }, {
1405
+ "id" : "s1-t85",
1406
+ "begin" : 1500,
1407
+ "end" : 1504,
1408
+ "text" : "2010",
1409
+ "whiteSpace" : false
1410
+ }, {
1411
+ "id" : "s1-t86",
1412
+ "begin" : 1505,
1413
+ "end" : 1511,
1414
+ "text" : "ULTIMO",
1415
+ "whiteSpace" : false
1416
+ }, {
1417
+ "id" : "s1-t87",
1418
+ "begin" : 1512,
1419
+ "end" : 1515,
1420
+ "text" : "AÑO",
1421
+ "whiteSpace" : false
1422
+ }, {
1423
+ "id" : "s1-t88",
1424
+ "begin" : 1516,
1425
+ "end" : 1524,
1426
+ "text" : "RENOVADO",
1427
+ "whiteSpace" : false
1428
+ }, {
1429
+ "id" : "s1-t89",
1430
+ "begin" : 1525,
1431
+ "end" : 1529,
1432
+ "text" : "2010",
1433
+ "whiteSpace" : false
1434
+ }, {
1435
+ "id" : "s1-t90",
1436
+ "begin" : 1530,
1437
+ "end" : 1539,
1438
+ "text" : "CERTIFICA",
1439
+ "whiteSpace" : false
1440
+ }, {
1441
+ "id" : "s1-t91",
1442
+ "begin" : 1540,
1443
+ "end" : 1542,
1444
+ "text" : "LA",
1445
+ "whiteSpace" : false
1446
+ }, {
1447
+ "id" : "s1-t92",
1448
+ "begin" : 1543,
1449
+ "end" : 1554,
1450
+ "text" : "INFORMACION",
1451
+ "whiteSpace" : false
1452
+ }, {
1453
+ "id" : "s1-t93",
1454
+ "begin" : 1555,
1455
+ "end" : 1563,
1456
+ "text" : "ANTERIOR",
1457
+ "whiteSpace" : false
1458
+ }, {
1459
+ "id" : "s1-t94",
1460
+ "begin" : 1564,
1461
+ "end" : 1566,
1462
+ "text" : "HA",
1463
+ "whiteSpace" : false
1464
+ }, {
1465
+ "id" : "s1-t95",
1466
+ "begin" : 1567,
1467
+ "end" : 1571,
1468
+ "text" : "SIDO",
1469
+ "whiteSpace" : false
1470
+ }, {
1471
+ "id" : "s1-t96",
1472
+ "begin" : 1572,
1473
+ "end" : 1578,
1474
+ "text" : "TOMADA",
1475
+ "whiteSpace" : false
1476
+ }, {
1477
+ "id" : "s1-t97",
1478
+ "begin" : 1579,
1479
+ "end" : 1591,
1480
+ "text" : "DIRECTAMENTE",
1481
+ "whiteSpace" : false
1482
+ }, {
1483
+ "id" : "s1-t98",
1484
+ "begin" : 1592,
1485
+ "end" : 1595,
1486
+ "text" : "DEL",
1487
+ "whiteSpace" : false
1488
+ }, {
1489
+ "id" : "s1-t99",
1490
+ "begin" : 1596,
1491
+ "end" : 1606,
1492
+ "text" : "FORMULARIO",
1493
+ "whiteSpace" : false
1494
+ }, {
1495
+ "id" : "s1-t100",
1496
+ "begin" : 1607,
1497
+ "end" : 1609,
1498
+ "text" : "DE",
1499
+ "whiteSpace" : false
1500
+ }, {
1501
+ "id" : "s1-t101",
1502
+ "begin" : 1610,
1503
+ "end" : 1619,
1504
+ "text" : "MATRICULA",
1505
+ "whiteSpace" : false
1506
+ }, {
1507
+ "id" : "s1-t102",
1508
+ "begin" : 1620,
1509
+ "end" : 1632,
1510
+ "text" : "DILIGENCIADO",
1511
+ "whiteSpace" : false
1512
+ }, {
1513
+ "id" : "s1-t103",
1514
+ "begin" : 1633,
1515
+ "end" : 1636,
1516
+ "text" : "POR",
1517
+ "whiteSpace" : false
1518
+ }, {
1519
+ "id" : "s1-t104",
1520
+ "begin" : 1637,
1521
+ "end" : 1639,
1522
+ "text" : "EL",
1523
+ "whiteSpace" : false
1524
+ }, {
1525
+ "id" : "s1-t105",
1526
+ "begin" : 1640,
1527
+ "end" : 1651,
1528
+ "text" : "COMERCIANTE",
1529
+ "whiteSpace" : false
1530
+ }, {
1531
+ "id" : "s1-t106",
1532
+ "begin" : 1653,
1533
+ "end" : 1655,
1534
+ "text" : "DE",
1535
+ "whiteSpace" : false
1536
+ }, {
1537
+ "id" : "s1-t107",
1538
+ "begin" : 1656,
1539
+ "end" : 1667,
1540
+ "text" : "CONFORMIDAD",
1541
+ "whiteSpace" : false
1542
+ }, {
1543
+ "id" : "s1-t108",
1544
+ "begin" : 1668,
1545
+ "end" : 1671,
1546
+ "text" : "CON",
1547
+ "whiteSpace" : false
1548
+ }, {
1549
+ "id" : "s1-t109",
1550
+ "begin" : 1672,
1551
+ "end" : 1674,
1552
+ "text" : "LO",
1553
+ "whiteSpace" : false
1554
+ }, {
1555
+ "id" : "s1-t110",
1556
+ "begin" : 1675,
1557
+ "end" : 1686,
1558
+ "text" : "ESTABLECIDO",
1559
+ "whiteSpace" : false
1560
+ }, {
1561
+ "id" : "s1-t111",
1562
+ "begin" : 1687,
1563
+ "end" : 1690,
1564
+ "text" : "POR",
1565
+ "whiteSpace" : false
1566
+ }, {
1567
+ "id" : "s1-t112",
1568
+ "begin" : 1691,
1569
+ "end" : 1693,
1570
+ "text" : "LA",
1571
+ "whiteSpace" : false
1572
+ }, {
1573
+ "id" : "s1-t113",
1574
+ "begin" : 1694,
1575
+ "end" : 1697,
1576
+ "text" : "LEY",
1577
+ "whiteSpace" : false
1578
+ }, {
1579
+ "id" : "s1-t114",
1580
+ "begin" : 1698,
1581
+ "end" : 1701,
1582
+ "text" : "962",
1583
+ "whiteSpace" : false
1584
+ }, {
1585
+ "id" : "s1-t115",
1586
+ "begin" : 1702,
1587
+ "end" : 1704,
1588
+ "text" : "DE",
1589
+ "whiteSpace" : false
1590
+ }, {
1591
+ "id" : "s1-t116",
1592
+ "begin" : 1705,
1593
+ "end" : 1709,
1594
+ "text" : "2005",
1595
+ "whiteSpace" : false
1596
+ }, {
1597
+ "id" : "s1-t117",
1598
+ "begin" : 1709,
1599
+ "end" : 1710,
1600
+ "text" : ",",
1601
+ "whiteSpace" : false
1602
+ }, {
1603
+ "id" : "s1-t118",
1604
+ "begin" : 1711,
1605
+ "end" : 1714,
1606
+ "text" : "LOS",
1607
+ "whiteSpace" : false
1608
+ }, {
1609
+ "id" : "s1-t119",
1610
+ "begin" : 1715,
1611
+ "end" : 1720,
1612
+ "text" : "ACTOS",
1613
+ "whiteSpace" : false
1614
+ }, {
1615
+ "id" : "s1-t120",
1616
+ "begin" : 1721,
1617
+ "end" : 1723,
1618
+ "text" : "DE",
1619
+ "whiteSpace" : false
1620
+ }, {
1621
+ "id" : "s1-t121",
1622
+ "begin" : 1724,
1623
+ "end" : 1732,
1624
+ "text" : "REGISTRO",
1625
+ "whiteSpace" : false
1626
+ }, {
1627
+ "id" : "s1-t122",
1628
+ "begin" : 1732,
1629
+ "end" : 1733,
1630
+ "text" : "-",
1631
+ "whiteSpace" : false
1632
+ }, {
1633
+ "id" : "s1-t123",
1634
+ "begin" : 1733,
1635
+ "end" : 1737,
1636
+ "text" : "AQUI",
1637
+ "whiteSpace" : false
1638
+ }, {
1639
+ "id" : "s1-t124",
1640
+ "begin" : 1737,
1641
+ "end" : 1738,
1642
+ "text" : "-",
1643
+ "whiteSpace" : false
1644
+ }, {
1645
+ "id" : "s1-t125",
1646
+ "begin" : 1738,
1647
+ "end" : 1750,
1648
+ "text" : "CERTIFICADOS",
1649
+ "whiteSpace" : false
1650
+ }, {
1651
+ "id" : "s1-t126",
1652
+ "begin" : 1750,
1653
+ "end" : 1751,
1654
+ "text" : "-",
1655
+ "whiteSpace" : false
1656
+ }, {
1657
+ "id" : "s1-t127",
1658
+ "begin" : 1751,
1659
+ "end" : 1752,
1660
+ "text" : "'",
1661
+ "whiteSpace" : false
1662
+ }, {
1663
+ "id" : "s1-t128",
1664
+ "begin" : 1752,
1665
+ "end" : 1758,
1666
+ "text" : "QUEDAN",
1667
+ "whiteSpace" : false
1668
+ }, {
1669
+ "id" : "s1-t129",
1670
+ "begin" : 1759,
1671
+ "end" : 1761,
1672
+ "text" : "EN",
1673
+ "whiteSpace" : false
1674
+ }, {
1675
+ "id" : "s1-t130",
1676
+ "begin" : 1761,
1677
+ "end" : 1762,
1678
+ "text" : "-",
1679
+ "whiteSpace" : false
1680
+ }, {
1681
+ "id" : "s1-t131",
1682
+ "begin" : 1762,
1683
+ "end" : 1767,
1684
+ "text" : "FIRME",
1685
+ "whiteSpace" : false
1686
+ }, {
1687
+ "id" : "s1-t132",
1688
+ "begin" : 1767,
1689
+ "end" : 1768,
1690
+ "text" : ",",
1691
+ "whiteSpace" : false
1692
+ }, {
1693
+ "id" : "s1-t133",
1694
+ "begin" : 1769,
1695
+ "end" : 1776,
1696
+ "text" : "CINCO-H",
1697
+ "whiteSpace" : false
1698
+ }, {
1699
+ "id" : "s1-t134",
1700
+ "begin" : 1777,
1701
+ "end" : 1781,
1702
+ "text" : "DIAS",
1703
+ "whiteSpace" : false
1704
+ }, {
1705
+ "id" : "s1-t135",
1706
+ "begin" : 1781,
1707
+ "end" : 1782,
1708
+ "text" : "-",
1709
+ "whiteSpace" : false
1710
+ }, {
1711
+ "id" : "s1-t136",
1712
+ "begin" : 1782,
1713
+ "end" : 1786,
1714
+ "text" : "HABI",
1715
+ "whiteSpace" : false
1716
+ }, {
1717
+ "id" : "s1-t137",
1718
+ "begin" : 1787,
1719
+ "end" : 1794,
1720
+ "text" : "DESPUES",
1721
+ "whiteSpace" : false
1722
+ }, {
1723
+ "id" : "s1-t138",
1724
+ "begin" : 1795,
1725
+ "end" : 1797,
1726
+ "text" : "DE",
1727
+ "whiteSpace" : false
1728
+ }, {
1729
+ "id" : "s1-t139",
1730
+ "begin" : 1798,
1731
+ "end" : 1800,
1732
+ "text" : "LA",
1733
+ "whiteSpace" : false
1734
+ }, {
1735
+ "id" : "s1-t140",
1736
+ "begin" : 1801,
1737
+ "end" : 1806,
1738
+ "text" : "FECHA",
1739
+ "whiteSpace" : false
1740
+ }, {
1741
+ "id" : "s1-t141",
1742
+ "begin" : 1807,
1743
+ "end" : 1809,
1744
+ "text" : "DE",
1745
+ "whiteSpace" : false
1746
+ }, {
1747
+ "id" : "s1-t142",
1748
+ "begin" : 1810,
1749
+ "end" : 1821,
1750
+ "text" : "INSCRIPCION",
1751
+ "whiteSpace" : false
1752
+ }, {
1753
+ "id" : "s1-t143",
1754
+ "begin" : 1821,
1755
+ "end" : 1822,
1756
+ "text" : ",",
1757
+ "whiteSpace" : false
1758
+ } ]
1759
+ }, {
1760
+ "id" : "s2",
1761
+ "begin" : 1823,
1762
+ "end" : 2272,
1763
+ "text" : "SIEMPRE QUE NO SEAN OBJETO DE RECURSOS EN LA VIA GUBERNATĪVA **EL PRESENTE CERTIFICADO NO CONSTITUYE PERMISO DE FUNCIONAMIENTO EN NINGUN CASO SEÑOR EMPRESARIO, SI SU EMPRESA TIENE ACTIVOS INFERIORES A 30.000 SMLMV Y UNA PLANTA DE PERSONAL DE MENOS DE 200 TRABAJADORES, USTED TIENE DERECHO A RECIBIR UN DESCUENTO EN EL PAGO DE LOS PARAFISCALES DE 75% EN EL PRIMER AÑO DE CONSTITUCION DE SU EMPRESA, DE 50% EN EL SEGUNDO AÑO Y DE 25% EN EL TERCER AÑO.",
1764
+ "tokens" : [ {
1765
+ "id" : "s2-t0",
1766
+ "begin" : 1823,
1767
+ "end" : 1830,
1768
+ "text" : "SIEMPRE",
1769
+ "whiteSpace" : false
1770
+ }, {
1771
+ "id" : "s2-t1",
1772
+ "begin" : 1831,
1773
+ "end" : 1834,
1774
+ "text" : "QUE",
1775
+ "whiteSpace" : false
1776
+ }, {
1777
+ "id" : "s2-t2",
1778
+ "begin" : 1835,
1779
+ "end" : 1837,
1780
+ "text" : "NO",
1781
+ "whiteSpace" : false
1782
+ }, {
1783
+ "id" : "s2-t3",
1784
+ "begin" : 1838,
1785
+ "end" : 1842,
1786
+ "text" : "SEAN",
1787
+ "whiteSpace" : false
1788
+ }, {
1789
+ "id" : "s2-t4",
1790
+ "begin" : 1843,
1791
+ "end" : 1849,
1792
+ "text" : "OBJETO",
1793
+ "whiteSpace" : false
1794
+ }, {
1795
+ "id" : "s2-t5",
1796
+ "begin" : 1850,
1797
+ "end" : 1852,
1798
+ "text" : "DE",
1799
+ "whiteSpace" : false
1800
+ }, {
1801
+ "id" : "s2-t6",
1802
+ "begin" : 1853,
1803
+ "end" : 1861,
1804
+ "text" : "RECURSOS",
1805
+ "whiteSpace" : false
1806
+ }, {
1807
+ "id" : "s2-t7",
1808
+ "begin" : 1862,
1809
+ "end" : 1864,
1810
+ "text" : "EN",
1811
+ "whiteSpace" : false
1812
+ }, {
1813
+ "id" : "s2-t8",
1814
+ "begin" : 1865,
1815
+ "end" : 1867,
1816
+ "text" : "LA",
1817
+ "whiteSpace" : false
1818
+ }, {
1819
+ "id" : "s2-t9",
1820
+ "begin" : 1868,
1821
+ "end" : 1871,
1822
+ "text" : "VIA",
1823
+ "whiteSpace" : false
1824
+ }, {
1825
+ "id" : "s2-t10",
1826
+ "begin" : 1872,
1827
+ "end" : 1883,
1828
+ "text" : "GUBERNATĪVA",
1829
+ "whiteSpace" : false
1830
+ }, {
1831
+ "id" : "s2-t11",
1832
+ "begin" : 1884,
1833
+ "end" : 1886,
1834
+ "text" : "**",
1835
+ "whiteSpace" : false
1836
+ }, {
1837
+ "id" : "s2-t12",
1838
+ "begin" : 1886,
1839
+ "end" : 1888,
1840
+ "text" : "EL",
1841
+ "whiteSpace" : false
1842
+ }, {
1843
+ "id" : "s2-t13",
1844
+ "begin" : 1889,
1845
+ "end" : 1897,
1846
+ "text" : "PRESENTE",
1847
+ "whiteSpace" : false
1848
+ }, {
1849
+ "id" : "s2-t14",
1850
+ "begin" : 1898,
1851
+ "end" : 1909,
1852
+ "text" : "CERTIFICADO",
1853
+ "whiteSpace" : false
1854
+ }, {
1855
+ "id" : "s2-t15",
1856
+ "begin" : 1910,
1857
+ "end" : 1912,
1858
+ "text" : "NO",
1859
+ "whiteSpace" : false
1860
+ }, {
1861
+ "id" : "s2-t16",
1862
+ "begin" : 1913,
1863
+ "end" : 1923,
1864
+ "text" : "CONSTITUYE",
1865
+ "whiteSpace" : false
1866
+ }, {
1867
+ "id" : "s2-t17",
1868
+ "begin" : 1924,
1869
+ "end" : 1931,
1870
+ "text" : "PERMISO",
1871
+ "whiteSpace" : false
1872
+ }, {
1873
+ "id" : "s2-t18",
1874
+ "begin" : 1932,
1875
+ "end" : 1934,
1876
+ "text" : "DE",
1877
+ "whiteSpace" : false
1878
+ }, {
1879
+ "id" : "s2-t19",
1880
+ "begin" : 1935,
1881
+ "end" : 1949,
1882
+ "text" : "FUNCIONAMIENTO",
1883
+ "whiteSpace" : false
1884
+ }, {
1885
+ "id" : "s2-t20",
1886
+ "begin" : 1950,
1887
+ "end" : 1952,
1888
+ "text" : "EN",
1889
+ "whiteSpace" : false
1890
+ }, {
1891
+ "id" : "s2-t21",
1892
+ "begin" : 1953,
1893
+ "end" : 1959,
1894
+ "text" : "NINGUN",
1895
+ "whiteSpace" : false
1896
+ }, {
1897
+ "id" : "s2-t22",
1898
+ "begin" : 1960,
1899
+ "end" : 1964,
1900
+ "text" : "CASO",
1901
+ "whiteSpace" : false
1902
+ }, {
1903
+ "id" : "s2-t23",
1904
+ "begin" : 1965,
1905
+ "end" : 1970,
1906
+ "text" : "SEÑOR",
1907
+ "whiteSpace" : false
1908
+ }, {
1909
+ "id" : "s2-t24",
1910
+ "begin" : 1971,
1911
+ "end" : 1981,
1912
+ "text" : "EMPRESARIO",
1913
+ "whiteSpace" : false
1914
+ }, {
1915
+ "id" : "s2-t25",
1916
+ "begin" : 1981,
1917
+ "end" : 1982,
1918
+ "text" : ",",
1919
+ "whiteSpace" : false
1920
+ }, {
1921
+ "id" : "s2-t26",
1922
+ "begin" : 1983,
1923
+ "end" : 1985,
1924
+ "text" : "SI",
1925
+ "whiteSpace" : false
1926
+ }, {
1927
+ "id" : "s2-t27",
1928
+ "begin" : 1986,
1929
+ "end" : 1988,
1930
+ "text" : "SU",
1931
+ "whiteSpace" : false
1932
+ }, {
1933
+ "id" : "s2-t28",
1934
+ "begin" : 1989,
1935
+ "end" : 1996,
1936
+ "text" : "EMPRESA",
1937
+ "whiteSpace" : false
1938
+ }, {
1939
+ "id" : "s2-t29",
1940
+ "begin" : 1997,
1941
+ "end" : 2002,
1942
+ "text" : "TIENE",
1943
+ "whiteSpace" : false
1944
+ }, {
1945
+ "id" : "s2-t30",
1946
+ "begin" : 2003,
1947
+ "end" : 2010,
1948
+ "text" : "ACTIVOS",
1949
+ "whiteSpace" : false
1950
+ }, {
1951
+ "id" : "s2-t31",
1952
+ "begin" : 2011,
1953
+ "end" : 2021,
1954
+ "text" : "INFERIORES",
1955
+ "whiteSpace" : false
1956
+ }, {
1957
+ "id" : "s2-t32",
1958
+ "begin" : 2022,
1959
+ "end" : 2023,
1960
+ "text" : "A",
1961
+ "whiteSpace" : false
1962
+ }, {
1963
+ "id" : "s2-t33",
1964
+ "begin" : 2024,
1965
+ "end" : 2030,
1966
+ "text" : "30.000",
1967
+ "whiteSpace" : false
1968
+ }, {
1969
+ "id" : "s2-t34",
1970
+ "begin" : 2031,
1971
+ "end" : 2036,
1972
+ "text" : "SMLMV",
1973
+ "whiteSpace" : false
1974
+ }, {
1975
+ "id" : "s2-t35",
1976
+ "begin" : 2037,
1977
+ "end" : 2038,
1978
+ "text" : "Y",
1979
+ "whiteSpace" : false
1980
+ }, {
1981
+ "id" : "s2-t36",
1982
+ "begin" : 2039,
1983
+ "end" : 2042,
1984
+ "text" : "UNA",
1985
+ "whiteSpace" : false
1986
+ }, {
1987
+ "id" : "s2-t37",
1988
+ "begin" : 2043,
1989
+ "end" : 2049,
1990
+ "text" : "PLANTA",
1991
+ "whiteSpace" : false
1992
+ }, {
1993
+ "id" : "s2-t38",
1994
+ "begin" : 2050,
1995
+ "end" : 2052,
1996
+ "text" : "DE",
1997
+ "whiteSpace" : false
1998
+ }, {
1999
+ "id" : "s2-t39",
2000
+ "begin" : 2053,
2001
+ "end" : 2061,
2002
+ "text" : "PERSONAL",
2003
+ "whiteSpace" : false
2004
+ }, {
2005
+ "id" : "s2-t40",
2006
+ "begin" : 2062,
2007
+ "end" : 2064,
2008
+ "text" : "DE",
2009
+ "whiteSpace" : false
2010
+ }, {
2011
+ "id" : "s2-t41",
2012
+ "begin" : 2065,
2013
+ "end" : 2070,
2014
+ "text" : "MENOS",
2015
+ "whiteSpace" : false
2016
+ }, {
2017
+ "id" : "s2-t42",
2018
+ "begin" : 2071,
2019
+ "end" : 2073,
2020
+ "text" : "DE",
2021
+ "whiteSpace" : false
2022
+ }, {
2023
+ "id" : "s2-t43",
2024
+ "begin" : 2074,
2025
+ "end" : 2077,
2026
+ "text" : "200",
2027
+ "whiteSpace" : false
2028
+ }, {
2029
+ "id" : "s2-t44",
2030
+ "begin" : 2078,
2031
+ "end" : 2090,
2032
+ "text" : "TRABAJADORES",
2033
+ "whiteSpace" : false
2034
+ }, {
2035
+ "id" : "s2-t45",
2036
+ "begin" : 2090,
2037
+ "end" : 2091,
2038
+ "text" : ",",
2039
+ "whiteSpace" : false
2040
+ }, {
2041
+ "id" : "s2-t46",
2042
+ "begin" : 2092,
2043
+ "end" : 2097,
2044
+ "text" : "USTED",
2045
+ "whiteSpace" : false
2046
+ }, {
2047
+ "id" : "s2-t47",
2048
+ "begin" : 2098,
2049
+ "end" : 2103,
2050
+ "text" : "TIENE",
2051
+ "whiteSpace" : false
2052
+ }, {
2053
+ "id" : "s2-t48",
2054
+ "begin" : 2104,
2055
+ "end" : 2111,
2056
+ "text" : "DERECHO",
2057
+ "whiteSpace" : false
2058
+ }, {
2059
+ "id" : "s2-t49",
2060
+ "begin" : 2112,
2061
+ "end" : 2113,
2062
+ "text" : "A",
2063
+ "whiteSpace" : false
2064
+ }, {
2065
+ "id" : "s2-t50",
2066
+ "begin" : 2114,
2067
+ "end" : 2121,
2068
+ "text" : "RECIBIR",
2069
+ "whiteSpace" : false
2070
+ }, {
2071
+ "id" : "s2-t51",
2072
+ "begin" : 2122,
2073
+ "end" : 2124,
2074
+ "text" : "UN",
2075
+ "whiteSpace" : false
2076
+ }, {
2077
+ "id" : "s2-t52",
2078
+ "begin" : 2125,
2079
+ "end" : 2134,
2080
+ "text" : "DESCUENTO",
2081
+ "whiteSpace" : false
2082
+ }, {
2083
+ "id" : "s2-t53",
2084
+ "begin" : 2135,
2085
+ "end" : 2137,
2086
+ "text" : "EN",
2087
+ "whiteSpace" : false
2088
+ }, {
2089
+ "id" : "s2-t54",
2090
+ "begin" : 2138,
2091
+ "end" : 2140,
2092
+ "text" : "EL",
2093
+ "whiteSpace" : false
2094
+ }, {
2095
+ "id" : "s2-t55",
2096
+ "begin" : 2141,
2097
+ "end" : 2145,
2098
+ "text" : "PAGO",
2099
+ "whiteSpace" : false
2100
+ }, {
2101
+ "id" : "s2-t56",
2102
+ "begin" : 2146,
2103
+ "end" : 2148,
2104
+ "text" : "DE",
2105
+ "whiteSpace" : false
2106
+ }, {
2107
+ "id" : "s2-t57",
2108
+ "begin" : 2149,
2109
+ "end" : 2152,
2110
+ "text" : "LOS",
2111
+ "whiteSpace" : false
2112
+ }, {
2113
+ "id" : "s2-t58",
2114
+ "begin" : 2153,
2115
+ "end" : 2165,
2116
+ "text" : "PARAFISCALES",
2117
+ "whiteSpace" : false
2118
+ }, {
2119
+ "id" : "s2-t59",
2120
+ "begin" : 2166,
2121
+ "end" : 2168,
2122
+ "text" : "DE",
2123
+ "whiteSpace" : false
2124
+ }, {
2125
+ "id" : "s2-t60",
2126
+ "begin" : 2169,
2127
+ "end" : 2171,
2128
+ "text" : "75",
2129
+ "whiteSpace" : false
2130
+ }, {
2131
+ "id" : "s2-t61",
2132
+ "begin" : 2171,
2133
+ "end" : 2172,
2134
+ "text" : "%",
2135
+ "whiteSpace" : false
2136
+ }, {
2137
+ "id" : "s2-t62",
2138
+ "begin" : 2173,
2139
+ "end" : 2175,
2140
+ "text" : "EN",
2141
+ "whiteSpace" : false
2142
+ }, {
2143
+ "id" : "s2-t63",
2144
+ "begin" : 2176,
2145
+ "end" : 2178,
2146
+ "text" : "EL",
2147
+ "whiteSpace" : false
2148
+ }, {
2149
+ "id" : "s2-t64",
2150
+ "begin" : 2179,
2151
+ "end" : 2185,
2152
+ "text" : "PRIMER",
2153
+ "whiteSpace" : false
2154
+ }, {
2155
+ "id" : "s2-t65",
2156
+ "begin" : 2186,
2157
+ "end" : 2189,
2158
+ "text" : "AÑO",
2159
+ "whiteSpace" : false
2160
+ }, {
2161
+ "id" : "s2-t66",
2162
+ "begin" : 2190,
2163
+ "end" : 2192,
2164
+ "text" : "DE",
2165
+ "whiteSpace" : false
2166
+ }, {
2167
+ "id" : "s2-t67",
2168
+ "begin" : 2193,
2169
+ "end" : 2205,
2170
+ "text" : "CONSTITUCION",
2171
+ "whiteSpace" : false
2172
+ }, {
2173
+ "id" : "s2-t68",
2174
+ "begin" : 2206,
2175
+ "end" : 2208,
2176
+ "text" : "DE",
2177
+ "whiteSpace" : false
2178
+ }, {
2179
+ "id" : "s2-t69",
2180
+ "begin" : 2209,
2181
+ "end" : 2211,
2182
+ "text" : "SU",
2183
+ "whiteSpace" : false
2184
+ }, {
2185
+ "id" : "s2-t70",
2186
+ "begin" : 2212,
2187
+ "end" : 2219,
2188
+ "text" : "EMPRESA",
2189
+ "whiteSpace" : false
2190
+ }, {
2191
+ "id" : "s2-t71",
2192
+ "begin" : 2219,
2193
+ "end" : 2220,
2194
+ "text" : ",",
2195
+ "whiteSpace" : false
2196
+ }, {
2197
+ "id" : "s2-t72",
2198
+ "begin" : 2221,
2199
+ "end" : 2223,
2200
+ "text" : "DE",
2201
+ "whiteSpace" : false
2202
+ }, {
2203
+ "id" : "s2-t73",
2204
+ "begin" : 2224,
2205
+ "end" : 2226,
2206
+ "text" : "50",
2207
+ "whiteSpace" : false
2208
+ }, {
2209
+ "id" : "s2-t74",
2210
+ "begin" : 2226,
2211
+ "end" : 2227,
2212
+ "text" : "%",
2213
+ "whiteSpace" : false
2214
+ }, {
2215
+ "id" : "s2-t75",
2216
+ "begin" : 2228,
2217
+ "end" : 2230,
2218
+ "text" : "EN",
2219
+ "whiteSpace" : false
2220
+ }, {
2221
+ "id" : "s2-t76",
2222
+ "begin" : 2231,
2223
+ "end" : 2233,
2224
+ "text" : "EL",
2225
+ "whiteSpace" : false
2226
+ }, {
2227
+ "id" : "s2-t77",
2228
+ "begin" : 2234,
2229
+ "end" : 2241,
2230
+ "text" : "SEGUNDO",
2231
+ "whiteSpace" : false
2232
+ }, {
2233
+ "id" : "s2-t78",
2234
+ "begin" : 2242,
2235
+ "end" : 2245,
2236
+ "text" : "AÑO",
2237
+ "whiteSpace" : false
2238
+ }, {
2239
+ "id" : "s2-t79",
2240
+ "begin" : 2246,
2241
+ "end" : 2247,
2242
+ "text" : "Y",
2243
+ "whiteSpace" : false
2244
+ }, {
2245
+ "id" : "s2-t80",
2246
+ "begin" : 2248,
2247
+ "end" : 2250,
2248
+ "text" : "DE",
2249
+ "whiteSpace" : false
2250
+ }, {
2251
+ "id" : "s2-t81",
2252
+ "begin" : 2251,
2253
+ "end" : 2253,
2254
+ "text" : "25",
2255
+ "whiteSpace" : false
2256
+ }, {
2257
+ "id" : "s2-t82",
2258
+ "begin" : 2253,
2259
+ "end" : 2254,
2260
+ "text" : "%",
2261
+ "whiteSpace" : false
2262
+ }, {
2263
+ "id" : "s2-t83",
2264
+ "begin" : 2255,
2265
+ "end" : 2257,
2266
+ "text" : "EN",
2267
+ "whiteSpace" : false
2268
+ }, {
2269
+ "id" : "s2-t84",
2270
+ "begin" : 2258,
2271
+ "end" : 2260,
2272
+ "text" : "EL",
2273
+ "whiteSpace" : false
2274
+ }, {
2275
+ "id" : "s2-t85",
2276
+ "begin" : 2261,
2277
+ "end" : 2267,
2278
+ "text" : "TERCER",
2279
+ "whiteSpace" : false
2280
+ }, {
2281
+ "id" : "s2-t86",
2282
+ "begin" : 2268,
2283
+ "end" : 2271,
2284
+ "text" : "AÑO",
2285
+ "whiteSpace" : false
2286
+ }, {
2287
+ "id" : "s2-t87",
2288
+ "begin" : 2271,
2289
+ "end" : 2272,
2290
+ "text" : ".",
2291
+ "whiteSpace" : false
2292
+ } ]
2293
+ }, {
2294
+ "id" : "s3",
2295
+ "begin" : 2273,
2296
+ "end" : 2311,
2297
+ "text" : "LEY 590 DE 2000 Y DECRETO 525 DE 2009.",
2298
+ "tokens" : [ {
2299
+ "id" : "s3-t0",
2300
+ "begin" : 2273,
2301
+ "end" : 2276,
2302
+ "text" : "LEY",
2303
+ "whiteSpace" : false
2304
+ }, {
2305
+ "id" : "s3-t1",
2306
+ "begin" : 2277,
2307
+ "end" : 2280,
2308
+ "text" : "590",
2309
+ "whiteSpace" : false
2310
+ }, {
2311
+ "id" : "s3-t2",
2312
+ "begin" : 2281,
2313
+ "end" : 2283,
2314
+ "text" : "DE",
2315
+ "whiteSpace" : false
2316
+ }, {
2317
+ "id" : "s3-t3",
2318
+ "begin" : 2284,
2319
+ "end" : 2288,
2320
+ "text" : "2000",
2321
+ "whiteSpace" : false
2322
+ }, {
2323
+ "id" : "s3-t4",
2324
+ "begin" : 2289,
2325
+ "end" : 2290,
2326
+ "text" : "Y",
2327
+ "whiteSpace" : false
2328
+ }, {
2329
+ "id" : "s3-t5",
2330
+ "begin" : 2291,
2331
+ "end" : 2298,
2332
+ "text" : "DECRETO",
2333
+ "whiteSpace" : false
2334
+ }, {
2335
+ "id" : "s3-t6",
2336
+ "begin" : 2299,
2337
+ "end" : 2302,
2338
+ "text" : "525",
2339
+ "whiteSpace" : false
2340
+ }, {
2341
+ "id" : "s3-t7",
2342
+ "begin" : 2303,
2343
+ "end" : 2305,
2344
+ "text" : "DE",
2345
+ "whiteSpace" : false
2346
+ }, {
2347
+ "id" : "s3-t8",
2348
+ "begin" : 2306,
2349
+ "end" : 2310,
2350
+ "text" : "2009",
2351
+ "whiteSpace" : false
2352
+ }, {
2353
+ "id" : "s3-t9",
2354
+ "begin" : 2310,
2355
+ "end" : 2311,
2356
+ "text" : ".",
2357
+ "whiteSpace" : false
2358
+ } ]
2359
+ }, {
2360
+ "id" : "s4",
2361
+ "begin" : 2312,
2362
+ "end" : 2631,
2363
+ "text" : "EL SECRETARIO DE LA CAMARA DE COMERCIO, VALOR $ 1,900 DE CONFORMIDAD CON EL DECRETO 2150 DE 1995 Y LA AUTORIZACION IMPARTIDA POR LA SUPERINTENDENCIA DE INDUSTRIA Y COMERCIO, MEDIANTE EL OFICIO DEL 18 DE NO IEMBRE DE 1996, LA FIRMA MECANICA QUE APARECE A CONTINUACION TIENE PLENA VALIDEZ PARA TODOS LOS EFECTOS LEGALES \"",
2364
+ "tokens" : [ {
2365
+ "id" : "s4-t0",
2366
+ "begin" : 2312,
2367
+ "end" : 2314,
2368
+ "text" : "EL",
2369
+ "whiteSpace" : false
2370
+ }, {
2371
+ "id" : "s4-t1",
2372
+ "begin" : 2315,
2373
+ "end" : 2325,
2374
+ "text" : "SECRETARIO",
2375
+ "whiteSpace" : false
2376
+ }, {
2377
+ "id" : "s4-t2",
2378
+ "begin" : 2326,
2379
+ "end" : 2328,
2380
+ "text" : "DE",
2381
+ "whiteSpace" : false
2382
+ }, {
2383
+ "id" : "s4-t3",
2384
+ "begin" : 2329,
2385
+ "end" : 2331,
2386
+ "text" : "LA",
2387
+ "whiteSpace" : false
2388
+ }, {
2389
+ "id" : "s4-t4",
2390
+ "begin" : 2332,
2391
+ "end" : 2338,
2392
+ "text" : "CAMARA",
2393
+ "whiteSpace" : false
2394
+ }, {
2395
+ "id" : "s4-t5",
2396
+ "begin" : 2339,
2397
+ "end" : 2341,
2398
+ "text" : "DE",
2399
+ "whiteSpace" : false
2400
+ }, {
2401
+ "id" : "s4-t6",
2402
+ "begin" : 2342,
2403
+ "end" : 2350,
2404
+ "text" : "COMERCIO",
2405
+ "whiteSpace" : false
2406
+ }, {
2407
+ "id" : "s4-t7",
2408
+ "begin" : 2350,
2409
+ "end" : 2351,
2410
+ "text" : ",",
2411
+ "whiteSpace" : false
2412
+ }, {
2413
+ "id" : "s4-t8",
2414
+ "begin" : 2352,
2415
+ "end" : 2357,
2416
+ "text" : "VALOR",
2417
+ "whiteSpace" : false
2418
+ }, {
2419
+ "id" : "s4-t9",
2420
+ "begin" : 2358,
2421
+ "end" : 2359,
2422
+ "text" : "$",
2423
+ "whiteSpace" : false
2424
+ }, {
2425
+ "id" : "s4-t10",
2426
+ "begin" : 2360,
2427
+ "end" : 2365,
2428
+ "text" : "1,900",
2429
+ "whiteSpace" : false
2430
+ }, {
2431
+ "id" : "s4-t11",
2432
+ "begin" : 2366,
2433
+ "end" : 2368,
2434
+ "text" : "DE",
2435
+ "whiteSpace" : false
2436
+ }, {
2437
+ "id" : "s4-t12",
2438
+ "begin" : 2369,
2439
+ "end" : 2380,
2440
+ "text" : "CONFORMIDAD",
2441
+ "whiteSpace" : false
2442
+ }, {
2443
+ "id" : "s4-t13",
2444
+ "begin" : 2381,
2445
+ "end" : 2384,
2446
+ "text" : "CON",
2447
+ "whiteSpace" : false
2448
+ }, {
2449
+ "id" : "s4-t14",
2450
+ "begin" : 2385,
2451
+ "end" : 2387,
2452
+ "text" : "EL",
2453
+ "whiteSpace" : false
2454
+ }, {
2455
+ "id" : "s4-t15",
2456
+ "begin" : 2388,
2457
+ "end" : 2395,
2458
+ "text" : "DECRETO",
2459
+ "whiteSpace" : false
2460
+ }, {
2461
+ "id" : "s4-t16",
2462
+ "begin" : 2396,
2463
+ "end" : 2400,
2464
+ "text" : "2150",
2465
+ "whiteSpace" : false
2466
+ }, {
2467
+ "id" : "s4-t17",
2468
+ "begin" : 2401,
2469
+ "end" : 2403,
2470
+ "text" : "DE",
2471
+ "whiteSpace" : false
2472
+ }, {
2473
+ "id" : "s4-t18",
2474
+ "begin" : 2404,
2475
+ "end" : 2408,
2476
+ "text" : "1995",
2477
+ "whiteSpace" : false
2478
+ }, {
2479
+ "id" : "s4-t19",
2480
+ "begin" : 2409,
2481
+ "end" : 2410,
2482
+ "text" : "Y",
2483
+ "whiteSpace" : false
2484
+ }, {
2485
+ "id" : "s4-t20",
2486
+ "begin" : 2411,
2487
+ "end" : 2413,
2488
+ "text" : "LA",
2489
+ "whiteSpace" : false
2490
+ }, {
2491
+ "id" : "s4-t21",
2492
+ "begin" : 2414,
2493
+ "end" : 2426,
2494
+ "text" : "AUTORIZACION",
2495
+ "whiteSpace" : false
2496
+ }, {
2497
+ "id" : "s4-t22",
2498
+ "begin" : 2427,
2499
+ "end" : 2436,
2500
+ "text" : "IMPARTIDA",
2501
+ "whiteSpace" : false
2502
+ }, {
2503
+ "id" : "s4-t23",
2504
+ "begin" : 2437,
2505
+ "end" : 2440,
2506
+ "text" : "POR",
2507
+ "whiteSpace" : false
2508
+ }, {
2509
+ "id" : "s4-t24",
2510
+ "begin" : 2441,
2511
+ "end" : 2443,
2512
+ "text" : "LA",
2513
+ "whiteSpace" : false
2514
+ }, {
2515
+ "id" : "s4-t25",
2516
+ "begin" : 2444,
2517
+ "end" : 2460,
2518
+ "text" : "SUPERINTENDENCIA",
2519
+ "whiteSpace" : false
2520
+ }, {
2521
+ "id" : "s4-t26",
2522
+ "begin" : 2461,
2523
+ "end" : 2463,
2524
+ "text" : "DE",
2525
+ "whiteSpace" : false
2526
+ }, {
2527
+ "id" : "s4-t27",
2528
+ "begin" : 2464,
2529
+ "end" : 2473,
2530
+ "text" : "INDUSTRIA",
2531
+ "whiteSpace" : false
2532
+ }, {
2533
+ "id" : "s4-t28",
2534
+ "begin" : 2474,
2535
+ "end" : 2475,
2536
+ "text" : "Y",
2537
+ "whiteSpace" : false
2538
+ }, {
2539
+ "id" : "s4-t29",
2540
+ "begin" : 2476,
2541
+ "end" : 2484,
2542
+ "text" : "COMERCIO",
2543
+ "whiteSpace" : false
2544
+ }, {
2545
+ "id" : "s4-t30",
2546
+ "begin" : 2484,
2547
+ "end" : 2485,
2548
+ "text" : ",",
2549
+ "whiteSpace" : false
2550
+ }, {
2551
+ "id" : "s4-t31",
2552
+ "begin" : 2486,
2553
+ "end" : 2494,
2554
+ "text" : "MEDIANTE",
2555
+ "whiteSpace" : false
2556
+ }, {
2557
+ "id" : "s4-t32",
2558
+ "begin" : 2495,
2559
+ "end" : 2497,
2560
+ "text" : "EL",
2561
+ "whiteSpace" : false
2562
+ }, {
2563
+ "id" : "s4-t33",
2564
+ "begin" : 2498,
2565
+ "end" : 2504,
2566
+ "text" : "OFICIO",
2567
+ "whiteSpace" : false
2568
+ }, {
2569
+ "id" : "s4-t34",
2570
+ "begin" : 2505,
2571
+ "end" : 2508,
2572
+ "text" : "DEL",
2573
+ "whiteSpace" : false
2574
+ }, {
2575
+ "id" : "s4-t35",
2576
+ "begin" : 2509,
2577
+ "end" : 2511,
2578
+ "text" : "18",
2579
+ "whiteSpace" : false
2580
+ }, {
2581
+ "id" : "s4-t36",
2582
+ "begin" : 2512,
2583
+ "end" : 2514,
2584
+ "text" : "DE",
2585
+ "whiteSpace" : false
2586
+ }, {
2587
+ "id" : "s4-t37",
2588
+ "begin" : 2515,
2589
+ "end" : 2517,
2590
+ "text" : "NO",
2591
+ "whiteSpace" : false
2592
+ }, {
2593
+ "id" : "s4-t38",
2594
+ "begin" : 2518,
2595
+ "end" : 2524,
2596
+ "text" : "IEMBRE",
2597
+ "whiteSpace" : false
2598
+ }, {
2599
+ "id" : "s4-t39",
2600
+ "begin" : 2525,
2601
+ "end" : 2527,
2602
+ "text" : "DE",
2603
+ "whiteSpace" : false
2604
+ }, {
2605
+ "id" : "s4-t40",
2606
+ "begin" : 2528,
2607
+ "end" : 2532,
2608
+ "text" : "1996",
2609
+ "whiteSpace" : false
2610
+ }, {
2611
+ "id" : "s4-t41",
2612
+ "begin" : 2532,
2613
+ "end" : 2533,
2614
+ "text" : ",",
2615
+ "whiteSpace" : false
2616
+ }, {
2617
+ "id" : "s4-t42",
2618
+ "begin" : 2534,
2619
+ "end" : 2536,
2620
+ "text" : "LA",
2621
+ "whiteSpace" : false
2622
+ }, {
2623
+ "id" : "s4-t43",
2624
+ "begin" : 2537,
2625
+ "end" : 2542,
2626
+ "text" : "FIRMA",
2627
+ "whiteSpace" : false
2628
+ }, {
2629
+ "id" : "s4-t44",
2630
+ "begin" : 2543,
2631
+ "end" : 2551,
2632
+ "text" : "MECANICA",
2633
+ "whiteSpace" : false
2634
+ }, {
2635
+ "id" : "s4-t45",
2636
+ "begin" : 2552,
2637
+ "end" : 2555,
2638
+ "text" : "QUE",
2639
+ "whiteSpace" : false
2640
+ }, {
2641
+ "id" : "s4-t46",
2642
+ "begin" : 2556,
2643
+ "end" : 2563,
2644
+ "text" : "APARECE",
2645
+ "whiteSpace" : false
2646
+ }, {
2647
+ "id" : "s4-t47",
2648
+ "begin" : 2564,
2649
+ "end" : 2565,
2650
+ "text" : "A",
2651
+ "whiteSpace" : false
2652
+ }, {
2653
+ "id" : "s4-t48",
2654
+ "begin" : 2566,
2655
+ "end" : 2578,
2656
+ "text" : "CONTINUACION",
2657
+ "whiteSpace" : false
2658
+ }, {
2659
+ "id" : "s4-t49",
2660
+ "begin" : 2579,
2661
+ "end" : 2584,
2662
+ "text" : "TIENE",
2663
+ "whiteSpace" : false
2664
+ }, {
2665
+ "id" : "s4-t50",
2666
+ "begin" : 2585,
2667
+ "end" : 2590,
2668
+ "text" : "PLENA",
2669
+ "whiteSpace" : false
2670
+ }, {
2671
+ "id" : "s4-t51",
2672
+ "begin" : 2591,
2673
+ "end" : 2598,
2674
+ "text" : "VALIDEZ",
2675
+ "whiteSpace" : false
2676
+ }, {
2677
+ "id" : "s4-t52",
2678
+ "begin" : 2599,
2679
+ "end" : 2603,
2680
+ "text" : "PARA",
2681
+ "whiteSpace" : false
2682
+ }, {
2683
+ "id" : "s4-t53",
2684
+ "begin" : 2604,
2685
+ "end" : 2609,
2686
+ "text" : "TODOS",
2687
+ "whiteSpace" : false
2688
+ }, {
2689
+ "id" : "s4-t54",
2690
+ "begin" : 2610,
2691
+ "end" : 2613,
2692
+ "text" : "LOS",
2693
+ "whiteSpace" : false
2694
+ }, {
2695
+ "id" : "s4-t55",
2696
+ "begin" : 2614,
2697
+ "end" : 2621,
2698
+ "text" : "EFECTOS",
2699
+ "whiteSpace" : false
2700
+ }, {
2701
+ "id" : "s4-t56",
2702
+ "begin" : 2622,
2703
+ "end" : 2629,
2704
+ "text" : "LEGALES",
2705
+ "whiteSpace" : false
2706
+ }, {
2707
+ "id" : "s4-t57",
2708
+ "begin" : 2630,
2709
+ "end" : 2631,
2710
+ "text" : "\"",
2711
+ "whiteSpace" : false
2712
+ } ]
2713
+ } ],
2714
+ "mentions" : [ {
2715
+ "id" : "s0-m9",
2716
+ "properties" : {
2717
+ "SIRE_MENTION_ROLE" : "OTRA_FECHA",
2718
+ "SIRE_ENTITY_SUBTYPE" : "SUBTIPO_OTRA_FECHA",
2719
+ "SIRE_MENTION_TYPE" : "NONE",
2720
+ "SIRE_MENTION_CLASS" : "SPC"
2721
+ },
2722
+ "type" : "FECHA",
2723
+ "begin" : 48,
2724
+ "end" : 71,
2725
+ "inCoref" : false
2726
+ }, {
2727
+ "id" : "s0-m10",
2728
+ "properties" : {
2729
+ "SIRE_MENTION_ROLE" : "PERSONA_NATURAL",
2730
+ "SIRE_ENTITY_SUBTYPE" : "SUBTIPO_PERSONA_NATURAL",
2731
+ "SIRE_MENTION_TYPE" : "NONE",
2732
+ "SIRE_MENTION_CLASS" : "SPC"
2733
+ },
2734
+ "type" : "PERSONA",
2735
+ "begin" : 268,
2736
+ "end" : 290,
2737
+ "inCoref" : false
2738
+ }, {
2739
+ "id" : "s0-m3",
2740
+ "properties" : {
2741
+ "SIRE_MENTION_ROLE" : "TIPO_DOCUMENTO_IDENTIDAD",
2742
+ "SIRE_ENTITY_SUBTYPE" : "CEDULA_CIUDADANIA",
2743
+ "SIRE_MENTION_TYPE" : "NONE",
2744
+ "SIRE_MENTION_CLASS" : "SPC"
2745
+ },
2746
+ "type" : "TIPO_DOCUMENTO_IDENTIDAD",
2747
+ "begin" : 291,
2748
+ "end" : 294,
2749
+ "inCoref" : false
2750
+ }, {
2751
+ "id" : "s0-m4",
2752
+ "properties" : {
2753
+ "SIRE_MENTION_ROLE" : "DOCUMENTO_IDENTIDAD",
2754
+ "SIRE_ENTITY_SUBTYPE" : "NONE",
2755
+ "SIRE_MENTION_TYPE" : "NONE",
2756
+ "SIRE_MENTION_CLASS" : "SPC"
2757
+ },
2758
+ "type" : "DOCUMENTO_IDENTIDAD",
2759
+ "begin" : 295,
2760
+ "end" : 301,
2761
+ "inCoref" : false
2762
+ }, {
2763
+ "id" : "s0-m5",
2764
+ "properties" : {
2765
+ "SIRE_MENTION_ROLE" : "TIPO_DOCUMENTO_IDENTIDAD",
2766
+ "SIRE_ENTITY_SUBTYPE" : "NIT",
2767
+ "SIRE_MENTION_TYPE" : "NONE",
2768
+ "SIRE_MENTION_CLASS" : "SPC"
2769
+ },
2770
+ "type" : "TIPO_DOCUMENTO_IDENTIDAD",
2771
+ "begin" : 302,
2772
+ "end" : 305,
2773
+ "inCoref" : false
2774
+ }, {
2775
+ "id" : "s0-m6",
2776
+ "properties" : {
2777
+ "SIRE_MENTION_ROLE" : "DOCUMENTO_IDENTIDAD",
2778
+ "SIRE_ENTITY_SUBTYPE" : "NONE",
2779
+ "SIRE_MENTION_TYPE" : "NONE",
2780
+ "SIRE_MENTION_CLASS" : "SPC"
2781
+ },
2782
+ "type" : "DOCUMENTO_IDENTIDAD",
2783
+ "begin" : 307,
2784
+ "end" : 315,
2785
+ "inCoref" : false
2786
+ }, {
2787
+ "id" : "s0-m8",
2788
+ "properties" : {
2789
+ "SIRE_MENTION_ROLE" : "TITULO_FECHA_MATRICULA",
2790
+ "SIRE_ENTITY_SUBTYPE" : "NONE",
2791
+ "SIRE_MENTION_TYPE" : "NONE",
2792
+ "SIRE_MENTION_CLASS" : "SPC"
2793
+ },
2794
+ "type" : "TITULO_FECHA_MATRICULA",
2795
+ "begin" : 327,
2796
+ "end" : 336,
2797
+ "inCoref" : false
2798
+ }, {
2799
+ "id" : "s0-m7",
2800
+ "properties" : {
2801
+ "SIRE_MENTION_ROLE" : "FECHA_MATRICULA",
2802
+ "SIRE_ENTITY_SUBTYPE" : "SUBTIPO_FECHA_MATRICULA",
2803
+ "SIRE_MENTION_TYPE" : "NONE",
2804
+ "SIRE_MENTION_CLASS" : "SPC"
2805
+ },
2806
+ "type" : "FECHA",
2807
+ "begin" : 353,
2808
+ "end" : 372,
2809
+ "inCoref" : false
2810
+ }, {
2811
+ "id" : "s1-m6",
2812
+ "properties" : {
2813
+ "SIRE_MENTION_ROLE" : "OTRA_FECHA",
2814
+ "SIRE_ENTITY_SUBTYPE" : "SUBTIPO_OTRA_FECHA",
2815
+ "SIRE_MENTION_TYPE" : "NONE",
2816
+ "SIRE_MENTION_CLASS" : "SPC"
2817
+ },
2818
+ "type" : "FECHA",
2819
+ "begin" : 962,
2820
+ "end" : 984,
2821
+ "inCoref" : false
2822
+ }, {
2823
+ "id" : "s1-m1",
2824
+ "properties" : {
2825
+ "SIRE_MENTION_ROLE" : "ACTIVIDAD",
2826
+ "SIRE_ENTITY_SUBTYPE" : "NONE",
2827
+ "SIRE_MENTION_TYPE" : "NONE",
2828
+ "SIRE_MENTION_CLASS" : "SPC"
2829
+ },
2830
+ "type" : "ACTIVIDAD",
2831
+ "begin" : 1074,
2832
+ "end" : 1221,
2833
+ "inCoref" : false
2834
+ }, {
2835
+ "id" : "s1-m3",
2836
+ "properties" : {
2837
+ "SIRE_MENTION_ROLE" : "TITULO_NOMBRE_O_RAZON_SOCIAL",
2838
+ "SIRE_ENTITY_SUBTYPE" : "NONE",
2839
+ "SIRE_MENTION_TYPE" : "NONE",
2840
+ "SIRE_MENTION_CLASS" : "SPC"
2841
+ },
2842
+ "type" : "TITULO_NOMBRE_O_RAZON_SOCIAL",
2843
+ "begin" : 1277,
2844
+ "end" : 1312,
2845
+ "inCoref" : false
2846
+ }, {
2847
+ "id" : "s1-m2",
2848
+ "properties" : {
2849
+ "SIRE_MENTION_ROLE" : "EMPRESA",
2850
+ "SIRE_ENTITY_SUBTYPE" : "NONE",
2851
+ "SIRE_MENTION_TYPE" : "NONE",
2852
+ "SIRE_MENTION_CLASS" : "SPC"
2853
+ },
2854
+ "type" : "EMPRESA",
2855
+ "begin" : 1313,
2856
+ "end" : 1343,
2857
+ "inCoref" : false
2858
+ }, {
2859
+ "id" : "s1-m4",
2860
+ "properties" : {
2861
+ "SIRE_MENTION_ROLE" : "TITULO_FECHA_MATRICULA",
2862
+ "SIRE_ENTITY_SUBTYPE" : "NONE",
2863
+ "SIRE_MENTION_TYPE" : "NONE",
2864
+ "SIRE_MENTION_CLASS" : "SPC"
2865
+ },
2866
+ "type" : "TITULO_FECHA_MATRICULA",
2867
+ "begin" : 1410,
2868
+ "end" : 1419,
2869
+ "inCoref" : false
2870
+ }, {
2871
+ "id" : "s1-m5",
2872
+ "properties" : {
2873
+ "SIRE_MENTION_ROLE" : "FECHA_MATRICULA",
2874
+ "SIRE_ENTITY_SUBTYPE" : "SUBTIPO_FECHA_MATRICULA",
2875
+ "SIRE_MENTION_TYPE" : "NONE",
2876
+ "SIRE_MENTION_CLASS" : "SPC"
2877
+ },
2878
+ "type" : "FECHA",
2879
+ "begin" : 1435,
2880
+ "end" : 1454,
2881
+ "inCoref" : false
2882
+ }, {
2883
+ "id" : "s1-m7",
2884
+ "properties" : {
2885
+ "SIRE_MENTION_ROLE" : "OTRA_FECHA",
2886
+ "SIRE_ENTITY_SUBTYPE" : "SUBTIPO_OTRA_FECHA",
2887
+ "SIRE_MENTION_TYPE" : "NONE",
2888
+ "SIRE_MENTION_CLASS" : "SPC"
2889
+ },
2890
+ "type" : "FECHA",
2891
+ "begin" : 1485,
2892
+ "end" : 1504,
2893
+ "inCoref" : false
2894
+ }, {
2895
+ "id" : "s4-m1",
2896
+ "properties" : {
2897
+ "SIRE_MENTION_ROLE" : "OTRA_FECHA",
2898
+ "SIRE_ENTITY_SUBTYPE" : "SUBTIPO_OTRA_FECHA",
2899
+ "SIRE_MENTION_TYPE" : "NONE",
2900
+ "SIRE_MENTION_CLASS" : "SPC"
2901
+ },
2902
+ "type" : "FECHA",
2903
+ "begin" : 2509,
2904
+ "end" : 2532,
2905
+ "inCoref" : false
2906
+ } ],
2907
+ "relations" : [ {
2908
+ "id" : "s0-r3",
2909
+ "properties" : { },
2910
+ "type" : "documento_identidad_tiene_tipo_documento_identidad",
2911
+ "args" : [ "s0-m4", "s0-m3" ]
2912
+ }, {
2913
+ "id" : "s0-r4",
2914
+ "properties" : { },
2915
+ "type" : "documento_identidad_tiene_tipo_documento_identidad",
2916
+ "args" : [ "s0-m6", "s0-m5" ]
2917
+ }, {
2918
+ "id" : "s0-r5",
2919
+ "properties" : { },
2920
+ "type" : "es_titulo_de_fecha_matricula",
2921
+ "args" : [ "s0-m8", "s0-m7" ]
2922
+ }, {
2923
+ "id" : "s0-r6",
2924
+ "properties" : { },
2925
+ "type" : "persona_tiene_tipo_documento_identidad",
2926
+ "args" : [ "s0-m10", "s0-m3" ]
2927
+ }, {
2928
+ "id" : "s0-r7",
2929
+ "properties" : { },
2930
+ "type" : "persona_tiene_documento_identidad",
2931
+ "args" : [ "s0-m10", "s0-m4" ]
2932
+ }, {
2933
+ "id" : "s0-r8",
2934
+ "properties" : { },
2935
+ "type" : "persona_tiene_tipo_documento_identidad",
2936
+ "args" : [ "s0-m10", "s0-m5" ]
2937
+ }, {
2938
+ "id" : "s0-r9",
2939
+ "properties" : { },
2940
+ "type" : "persona_tiene_documento_identidad",
2941
+ "args" : [ "s0-m10", "s0-m6" ]
2942
+ }, {
2943
+ "id" : "s0-r10",
2944
+ "properties" : { },
2945
+ "type" : "documento_identidad_tiene_tipo_documento_identidad",
2946
+ "args" : [ "s0-m4", "s0-m5" ]
2947
+ }, {
2948
+ "id" : "s1-r1",
2949
+ "properties" : { },
2950
+ "type" : "es_titulo_de_empresa",
2951
+ "args" : [ "s1-m3", "s1-m2" ]
2952
+ }, {
2953
+ "id" : "s1-r2",
2954
+ "properties" : { },
2955
+ "type" : "es_titulo_de_fecha_matricula",
2956
+ "args" : [ "s1-m4", "s1-m5" ]
2957
+ } ],
2958
+ "corefs" : [ ],
2959
+ "typeResolved" : true,
2960
+ "userResolved" : false
2961
+ }
fast/RC/rel2id.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Product-Producer": 0, "Cause-Effect": 1, "Content-Container": 2, "Component-Whole": 3, "Other": 4, "Entity-Destination": 5, "Instrument-Agency": 6, "Entity-Origin": 7, "Message-Topic": 8, "Member-Collection": 9}
fast/RC/train.txt ADDED
@@ -0,0 +1,1037 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The O Component-Whole
2
+ system O -
3
+ as O -
4
+ described O -
5
+ above O -
6
+ has O -
7
+ its O -
8
+ greatest O -
9
+ application O -
10
+ in O -
11
+ an O -
12
+ arrayed O -
13
+ configuration Whole -
14
+ of O -
15
+ antenna O -
16
+ elements Component -
17
+ . O -
18
+
19
+ The O Other
20
+ child Orelation1 -
21
+ was O -
22
+ carefully O -
23
+ wrapped O -
24
+ and O -
25
+ bound O -
26
+ into O -
27
+ the O -
28
+ cradle Orelation2 -
29
+ by O -
30
+ means O -
31
+ of O -
32
+ a O -
33
+ cord O -
34
+ . O -
35
+
36
+ The O Instrument-Agency
37
+ author Agency -
38
+ of O -
39
+ a O -
40
+ keygen O -
41
+ uses O -
42
+ a O -
43
+ disassembler Instrument -
44
+ to O -
45
+ look O -
46
+ at O -
47
+ the O -
48
+ raw O -
49
+ assembly O -
50
+ code O -
51
+ . O -
52
+
53
+ A O Other
54
+ misty O -
55
+ ridge Orelation1 -
56
+ uprises O -
57
+ from O -
58
+ the O -
59
+ surge Orelation2 -
60
+ . O -
61
+
62
+ The O Member-Collection
63
+ student Member -
64
+ association Collection -
65
+ is O -
66
+ the O -
67
+ voice O -
68
+ of O -
69
+ the O -
70
+ undergraduate O -
71
+ student O -
72
+ population O -
73
+ of O -
74
+ the O -
75
+ State O -
76
+ University O -
77
+ of O -
78
+ New O -
79
+ York O -
80
+ at O -
81
+ Buffalo O -
82
+ . O -
83
+
84
+ This O Other
85
+ is O -
86
+ the O -
87
+ sprawling O -
88
+ complex Orelation1 -
89
+ that O -
90
+ is O -
91
+ Peru O -
92
+ 's O -
93
+ largest O -
94
+ producer Orelation2 -
95
+ of O -
96
+ silver O -
97
+ . O -
98
+
99
+ The O Cause-Effect
100
+ current O -
101
+ view O -
102
+ is O -
103
+ that O -
104
+ the O -
105
+ chronic O -
106
+ inflammation Effect -
107
+ in O -
108
+ the O -
109
+ distal O -
110
+ part O -
111
+ of O -
112
+ the O -
113
+ stomach O -
114
+ caused O -
115
+ by O -
116
+ Helicobacter O -
117
+ pylori O -
118
+ infection Cause -
119
+ results O -
120
+ in O -
121
+ an O -
122
+ increased O -
123
+ acid O -
124
+ production O -
125
+ from O -
126
+ the O -
127
+ non-infected O -
128
+ upper O -
129
+ corpus O -
130
+ region O -
131
+ of O -
132
+ the O -
133
+ stomach O -
134
+ . O -
135
+
136
+ People Entity -
137
+ have O -
138
+ been O -
139
+ moving O -
140
+ back O -
141
+ into O -
142
+ downtown Destination -
143
+ . O -
144
+
145
+ The O Content-Container
146
+ lawsonite Content -
147
+ was O -
148
+ contained O -
149
+ in O -
150
+ a O -
151
+ platinum Container -
152
+ crucible Container -
153
+ and O -
154
+ the O -
155
+ counter-weight O -
156
+ was O -
157
+ a O -
158
+ plastic O -
159
+ crucible O -
160
+ with O -
161
+ metal O -
162
+ pieces O -
163
+ . O -
164
+
165
+ The O Entity-Destination
166
+ solute O -
167
+ was O -
168
+ placed O -
169
+ inside O -
170
+ a O -
171
+ beaker O -
172
+ and O -
173
+ 5 O -
174
+ mL O -
175
+ of O -
176
+ the O -
177
+ solvent Entity -
178
+ was O -
179
+ pipetted O -
180
+ into O -
181
+ a O -
182
+ 25 O -
183
+ mL O -
184
+ glass O -
185
+ flask Destination -
186
+ for O -
187
+ each O -
188
+ trial O -
189
+ . O -
190
+
191
+ The O Member-Collection
192
+ fifty O -
193
+ essays Member -
194
+ collected O -
195
+ in O -
196
+ this O -
197
+ volume Collection -
198
+ testify O -
199
+ to O -
200
+ most O -
201
+ of O -
202
+ the O -
203
+ prominent O -
204
+ themes O -
205
+ from O -
206
+ Professor O -
207
+ Quispel O -
208
+ 's O -
209
+ scholarly O -
210
+ career O -
211
+ . O -
212
+
213
+ Their O Other
214
+ composer Orelation1 -
215
+ has O -
216
+ sunk O -
217
+ into O -
218
+ oblivion Orelation2 -
219
+ . O -
220
+
221
+ The O Message-Topic
222
+ Pulitzer O -
223
+ Committee O -
224
+ issues O -
225
+ an O -
226
+ official O -
227
+ citation Message -
228
+ explaining O -
229
+ the O -
230
+ reasons Topic -
231
+ for O -
232
+ the O -
233
+ award O -
234
+ . O -
235
+
236
+ The O Cause-Effect
237
+ burst Effect -
238
+ has O -
239
+ been O -
240
+ caused O -
241
+ by O -
242
+ water O -
243
+ hammer O -
244
+ pressure Cause -
245
+ . O -
246
+
247
+ Even O Instrument-Agency
248
+ commercial O -
249
+ networks Agency -
250
+ have O -
251
+ moved O -
252
+ into O -
253
+ high-definition Instrument -
254
+ broadcast Instrument -
255
+ . O -
256
+
257
+ It O Message-Topic
258
+ was O -
259
+ a O -
260
+ friendly O -
261
+ call Message -
262
+ to O -
263
+ remind O -
264
+ them O -
265
+ about O -
266
+ the O -
267
+ bill Topic -
268
+ and O -
269
+ make O -
270
+ sure O -
271
+ they O -
272
+ have O -
273
+ a O -
274
+ copy O -
275
+ of O -
276
+ the O -
277
+ invoice O -
278
+ . O -
279
+
280
+ Texas-born O Instrument-Agency
281
+ virtuoso Agency -
282
+ finds O -
283
+ harmony O -
284
+ , O -
285
+ sophistication O -
286
+ in O -
287
+ Appalachian O -
288
+ instrument Instrument -
289
+ . O -
290
+
291
+ The O Product-Producer
292
+ factory Producer -
293
+ ' O -
294
+ s O -
295
+ products O -
296
+ have O -
297
+ included O -
298
+ flower O -
299
+ pots O -
300
+ , O -
301
+ Finnish O -
302
+ rooster-whistles O -
303
+ , O -
304
+ pans O -
305
+ , O -
306
+ trays Product -
307
+ , O -
308
+ tea O -
309
+ pots O -
310
+ , O -
311
+ ash O -
312
+ trays O -
313
+ and O -
314
+ air O -
315
+ moisturisers O -
316
+ . O -
317
+
318
+ The O Component-Whole
319
+ girl O -
320
+ showed O -
321
+ a O -
322
+ photo O -
323
+ of O -
324
+ apple O -
325
+ tree Whole -
326
+ blossom Component -
327
+ on O -
328
+ a O -
329
+ fruit O -
330
+ tree O -
331
+ in O -
332
+ the O -
333
+ Central O -
334
+ Valley O -
335
+ . O -
336
+
337
+ They O Member-Collection
338
+ tried O -
339
+ an O -
340
+ assault O -
341
+ of O -
342
+ their O -
343
+ own O -
344
+ an O -
345
+ hour O -
346
+ later O -
347
+ , O -
348
+ with O -
349
+ two O -
350
+ columns O -
351
+ of O -
352
+ sixteen O -
353
+ tanks O -
354
+ backed O -
355
+ by O -
356
+ a O -
357
+ battalion Collection -
358
+ of O -
359
+ Panzer O -
360
+ grenadiers Member -
361
+ . O -
362
+
363
+ Their O Entity-Origin
364
+ knowledge Entity -
365
+ of O -
366
+ the O -
367
+ power O -
368
+ and O -
369
+ rank O -
370
+ symbols O -
371
+ of O -
372
+ the O -
373
+ Continental O -
374
+ empires O -
375
+ was O -
376
+ gained O -
377
+ from O -
378
+ the O -
379
+ numerous O -
380
+ Germanic O -
381
+ recruits Origin -
382
+ in O -
383
+ the O -
384
+ Roman O -
385
+ army O -
386
+ , O -
387
+ and O -
388
+ from O -
389
+ the O -
390
+ Roman O -
391
+ practice O -
392
+ of O -
393
+ enfeoffing O -
394
+ various O -
395
+ Germanic O -
396
+ warrior O -
397
+ groups O -
398
+ with O -
399
+ land O -
400
+ in O -
401
+ the O -
402
+ imperial O -
403
+ provinces O -
404
+ . O -
405
+
406
+ She O Member-Collection
407
+ soon O -
408
+ had O -
409
+ a O -
410
+ stable Collection -
411
+ of O -
412
+ her O -
413
+ own O -
414
+ rescued O -
415
+ hounds Member -
416
+ . O -
417
+
418
+ The O Cause-Effect
419
+ singer Cause -
420
+ , O -
421
+ who O -
422
+ performed O -
423
+ three O -
424
+ of O -
425
+ the O -
426
+ nominated O -
427
+ songs O -
428
+ , O -
429
+ also O -
430
+ caused O -
431
+ a O -
432
+ commotion Effect -
433
+ on O -
434
+ the O -
435
+ red O -
436
+ carpet O -
437
+ . O -
438
+
439
+ His O Other
440
+ intellectually O -
441
+ engaging O -
442
+ books O -
443
+ and O -
444
+ essays Orelation1 -
445
+ remain O -
446
+ pertinent O -
447
+ to O -
448
+ illuminating O -
449
+ contemporary O -
450
+ history Orelation2 -
451
+ . O -
452
+
453
+ Poor O Member-Collection
454
+ hygiene O -
455
+ controls O -
456
+ , O -
457
+ reports O -
458
+ of O -
459
+ a O -
460
+ brace Collection -
461
+ of O -
462
+ gamey O -
463
+ grouse Member -
464
+ and O -
465
+ what O -
466
+ looked O -
467
+ like O -
468
+ a O -
469
+ skinned O -
470
+ fox O -
471
+ all O -
472
+ amounted O -
473
+ to O -
474
+ a O -
475
+ pie O -
476
+ that O -
477
+ was O -
478
+ unfit O -
479
+ for O -
480
+ human O -
481
+ consumption O -
482
+ . O -
483
+
484
+ This O Other
485
+ sweet O -
486
+ dress Orelation1 -
487
+ is O -
488
+ made O -
489
+ with O -
490
+ a O -
491
+ blend Orelation2 -
492
+ of O -
493
+ cotton O -
494
+ and O -
495
+ silk O -
496
+ , O -
497
+ and O -
498
+ the O -
499
+ crochet O -
500
+ flower O -
501
+ necklace O -
502
+ is O -
503
+ the O -
504
+ perfect O -
505
+ accessory O -
506
+ . O -
507
+
508
+ Suicide Cause -
509
+ is O -
510
+ one O -
511
+ of O -
512
+ the O -
513
+ leading O -
514
+ causes O -
515
+ of O -
516
+ death Effect -
517
+ among O -
518
+ pre-adolescents O -
519
+ and O -
520
+ teens O -
521
+ , O -
522
+ and O -
523
+ victims O -
524
+ of O -
525
+ bullying O -
526
+ are O -
527
+ at O -
528
+ an O -
529
+ increased O -
530
+ risk O -
531
+ for O -
532
+ committing O -
533
+ suicide O -
534
+ . O -
535
+
536
+ This O Message-Topic
537
+ article Message -
538
+ gives O -
539
+ details O -
540
+ on O -
541
+ 2004 O -
542
+ in O -
543
+ music Topic -
544
+ in O -
545
+ the O -
546
+ United O -
547
+ Kingdom O -
548
+ , O -
549
+ including O -
550
+ the O -
551
+ official O -
552
+ charts O -
553
+ from O -
554
+ that O -
555
+ year O -
556
+ . O -
557
+
558
+ We O Message-Topic
559
+ have O -
560
+ therefore O -
561
+ taken O -
562
+ the O -
563
+ initiative O -
564
+ to O -
565
+ convene O -
566
+ the O -
567
+ first O -
568
+ international O -
569
+ open O -
570
+ meeting Message -
571
+ dedicated O -
572
+ solely O -
573
+ to O -
574
+ rural Topic -
575
+ history Topic -
576
+ . O -
577
+
578
+ The O Component-Whole
579
+ timer Component -
580
+ of O -
581
+ the O -
582
+ device Whole -
583
+ automatically O -
584
+ eliminates O -
585
+ wasted O -
586
+ " O -
587
+ standby O -
588
+ power O -
589
+ " O -
590
+ consumption O -
591
+ by O -
592
+ automatically O -
593
+ turn O -
594
+ off O -
595
+ electronics O -
596
+ plugged O -
597
+ into O -
598
+ the O -
599
+ " O -
600
+ auto O -
601
+ off O -
602
+ " O -
603
+ outlets O -
604
+ . O -
605
+
606
+ Bob O Message-Topic
607
+ Parks O -
608
+ made O -
609
+ a O -
610
+ similar O -
611
+ offer Topic -
612
+ in O -
613
+ a O -
614
+ phone Message -
615
+ call Message -
616
+ made O -
617
+ earlier O -
618
+ this O -
619
+ week O -
620
+ . O -
621
+
622
+ He O Cause-Effect
623
+ had O -
624
+ chest O -
625
+ pains O -
626
+ and O -
627
+ headaches Effect -
628
+ from O -
629
+ mold Cause -
630
+ in O -
631
+ the O -
632
+ bedrooms O -
633
+ . O -
634
+
635
+ The O Product-Producer
636
+ silver-haired O -
637
+ author O -
638
+ was O -
639
+ not O -
640
+ just O -
641
+ laying O -
642
+ India O -
643
+ 's O -
644
+ politician O -
645
+ saint O -
646
+ to O -
647
+ rest O -
648
+ but O -
649
+ healing O -
650
+ a O -
651
+ generations-old O -
652
+ rift O -
653
+ in O -
654
+ the O -
655
+ family O -
656
+ of O -
657
+ the O -
658
+ country Product -
659
+ ' O -
660
+ s O -
661
+ founding O -
662
+ father Producer -
663
+ . O -
664
+
665
+ It O Entity-Destination
666
+ describes O -
667
+ a O -
668
+ method O -
669
+ for O -
670
+ loading O -
671
+ a O -
672
+ horizontal O -
673
+ stack Entity -
674
+ of O -
675
+ containers O -
676
+ into O -
677
+ a O -
678
+ carton Destination -
679
+ . O -
680
+
681
+ The O Component-Whole
682
+ Foundation O -
683
+ decided O -
684
+ to O -
685
+ repurpose O -
686
+ the O -
687
+ building O -
688
+ in O -
689
+ order O -
690
+ to O -
691
+ reduce O -
692
+ wear O -
693
+ and O -
694
+ tear O -
695
+ on O -
696
+ the O -
697
+ plumbing Component -
698
+ in O -
699
+ the O -
700
+ manor Whole -
701
+ house Whole -
702
+ by O -
703
+ redirecting O -
704
+ visitors O -
705
+ during O -
706
+ restoration O -
707
+ projects O -
708
+ and O -
709
+ beyond O -
710
+ . O -
711
+
712
+ The O Entity-Origin
713
+ technology O -
714
+ is O -
715
+ available O -
716
+ to O -
717
+ produce O -
718
+ and O -
719
+ transmit O -
720
+ electricity Entity -
721
+ economically O -
722
+ from O -
723
+ OTEC O -
724
+ systems Origin -
725
+ . O -
726
+
727
+ The O Other
728
+ Medicare O -
729
+ buy-in O -
730
+ plan Orelation1 -
731
+ ran O -
732
+ into O -
733
+ Senate O -
734
+ resistance Orelation2 -
735
+ . O -
736
+
737
+ The O Component-Whole
738
+ provinces Whole -
739
+ are O -
740
+ divided O -
741
+ into O -
742
+ counties Component -
743
+ ( O -
744
+ shahrestan O -
745
+ ) O -
746
+ , O -
747
+ and O -
748
+ subdivided O -
749
+ into O -
750
+ districts O -
751
+ ( O -
752
+ bakhsh O -
753
+ ) O -
754
+ and O -
755
+ sub-districts O -
756
+ ( O -
757
+ dehestan O -
758
+ ) O -
759
+ . O -
760
+
761
+ Financial O Cause-Effect
762
+ stress Cause -
763
+ is O -
764
+ one O -
765
+ of O -
766
+ the O -
767
+ main O -
768
+ causes O -
769
+ of O -
770
+ divorce Effect -
771
+ . O -
772
+
773
+ Newspapers Agency -
774
+ swap O -
775
+ content O -
776
+ via O -
777
+ widgets O -
778
+ with O -
779
+ the O -
780
+ help O -
781
+ of O -
782
+ the O -
783
+ newsgator O -
784
+ service Instrument -
785
+ . O -
786
+
787
+ The O Cause-Effect
788
+ women Cause -
789
+ that O -
790
+ caused O -
791
+ the O -
792
+ accident Effect -
793
+ was O -
794
+ on O -
795
+ the O -
796
+ cell O -
797
+ phone O -
798
+ and O -
799
+ ran O -
800
+ thru O -
801
+ the O -
802
+ intersection O -
803
+ without O -
804
+ pausing O -
805
+ on O -
806
+ the O -
807
+ median O -
808
+ . O -
809
+
810
+ The O Content-Container
811
+ transmitter Content -
812
+ was O -
813
+ discovered O -
814
+ inside O -
815
+ a O -
816
+ bed O -
817
+ settee O -
818
+ suite Container -
819
+ on O -
820
+ which O -
821
+ he O -
822
+ had O -
823
+ been O -
824
+ sitting O -
825
+ . O -
826
+
827
+ The O Member-Collection
828
+ Kerala O -
829
+ backwaters O -
830
+ are O -
831
+ a O -
832
+ chain Collection -
833
+ of O -
834
+ brackish O -
835
+ lagoons Member -
836
+ and O -
837
+ lakes O -
838
+ lying O -
839
+ parallel O -
840
+ to O -
841
+ the O -
842
+ Arabian O -
843
+ Sea O -
844
+ coast O -
845
+ of O -
846
+ Kerala O -
847
+ state O -
848
+ in O -
849
+ southern O -
850
+ India O -
851
+ . O -
852
+
853
+ A O Entity-Origin
854
+ St. O -
855
+ Paul O -
856
+ College O -
857
+ student Entity -
858
+ was O -
859
+ released O -
860
+ from O -
861
+ jail Origin -
862
+ Wednesday O -
863
+ night O -
864
+ , O -
865
+ after O -
866
+ his O -
867
+ arrest O -
868
+ Tuesday O -
869
+ in O -
870
+ the O -
871
+ alleged O -
872
+ rape O -
873
+ of O -
874
+ another O -
875
+ student O -
876
+ on O -
877
+ campus O -
878
+ . O -
879
+
880
+ Calluses Effect -
881
+ are O -
882
+ caused O -
883
+ by O -
884
+ improperly O -
885
+ fitting O -
886
+ shoes O -
887
+ or O -
888
+ by O -
889
+ a O -
890
+ skin Cause -
891
+ abnormality Cause -
892
+ . O -
893
+
894
+ Adults Agency -
895
+ use O -
896
+ drugs Instrument -
897
+ for O -
898
+ this O -
899
+ purpose O -
900
+ . O -
901
+
902
+ The O Instrument-Agency
903
+ councilor Agency -
904
+ proposed O -
905
+ assessing O -
906
+ infinitival O -
907
+ complements O -
908
+ through O -
909
+ elicitation Instrument -
910
+ . O -
911
+
912
+ As O Cause-Effect
913
+ in O -
914
+ the O -
915
+ popular O -
916
+ movie O -
917
+ " O -
918
+ Deep O -
919
+ Impact O -
920
+ " O -
921
+ , O -
922
+ the O -
923
+ action O -
924
+ of O -
925
+ the O -
926
+ Perseid O -
927
+ meteor Effect -
928
+ shower Effect -
929
+ is O -
930
+ caused O -
931
+ by O -
932
+ a O -
933
+ comet Cause -
934
+ , O -
935
+ in O -
936
+ this O -
937
+ case O -
938
+ periodic O -
939
+ comet O -
940
+ Swift-Tuttle O -
941
+ . O -
942
+
943
+ The O Other
944
+ following O -
945
+ information O -
946
+ appeared O -
947
+ in O -
948
+ the O -
949
+ notes Orelation1 -
950
+ to O -
951
+ consolidated O -
952
+ financial O -
953
+ statements Orelation2 -
954
+ of O -
955
+ some O -
956
+ corporate O -
957
+ annual O -
958
+ reports O -
959
+ . O -
960
+
961
+ HipHop O Other
962
+ appropriates O -
963
+ the O -
964
+ symbols O -
965
+ of O -
966
+ a O -
967
+ consumer O -
968
+ society O -
969
+ : O -
970
+ oversized O -
971
+ diamond Orelation1 -
972
+ colliers Orelation2 -
973
+ are O -
974
+ worn O -
975
+ . O -
976
+
977
+ The O Cause-Effect
978
+ radiation Effect -
979
+ from O -
980
+ the O -
981
+ atomic O -
982
+ bomb Cause -
983
+ explosion Cause -
984
+ is O -
985
+ a O -
986
+ typical O -
987
+ acute O -
988
+ radiation O -
989
+ . O -
990
+
991
+ The O Component-Whole
992
+ ride-on O -
993
+ boat Whole -
994
+ tiller Component -
995
+ was O -
996
+ developed O -
997
+ by O -
998
+ engineers O -
999
+ Arnold O -
1000
+ S. O -
1001
+ Juliano O -
1002
+ and O -
1003
+ Dr. O -
1004
+ Eulito O -
1005
+ U. O -
1006
+ Bautista O -
1007
+ . O -
1008
+
1009
+ A O Cause-Effect
1010
+ neoplastic O -
1011
+ recurrence Effect -
1012
+ arose O -
1013
+ from O -
1014
+ an O -
1015
+ extensive O -
1016
+ radiation Cause -
1017
+ induced O -
1018
+ ulceration O -
1019
+ . O -
1020
+
1021
+ He O Cause-Effect
1022
+ has O -
1023
+ a O -
1024
+ tattoo O -
1025
+ on O -
1026
+ his O -
1027
+ right O -
1028
+ arm O -
1029
+ and O -
1030
+ scars Effect -
1031
+ from O -
1032
+ stitches Cause -
1033
+ on O -
1034
+ his O -
1035
+ right O -
1036
+ elbow O -
1037
+ . O -
models/CCC/best-model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a238cabb70e6382b209c33ab2c24645f464f0f077dc4721794027a4f33a8fd17
3
+ size 2239869805
models/RC/SemEval/best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:219cc30a5fc9e22b2a3a59ad0f114f49a7f041c45b1d063dd6acfe95b107677d
3
+ size 129032885
models/RC/SemEval/rel2id.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Product-Producer": 0, "Cause-Effect": 1, "Content-Container": 2, "Component-Whole": 3, "Other": 4, "Entity-Destination": 5, "Instrument-Agency": 6, "Entity-Origin": 7, "Message-Topic": 8, "Member-Collection": 9}
models/RC/new/best_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590e08f4b59b0e916e307f40eb97831ed8a1b244422fb0e2dbdf0a5ef2952e55
3
+ size 129032363
models/RC/new/rel2id.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Product-Producer": 0, "Cause-Effect": 1, "Content-Container": 2, "Component-Whole": 3, "Other": 4, "Entity-Destination": 5, "Instrument-Agency": 6, "Entity-Origin": 7, "Message-Topic": 8, "Member-Collection": 9}
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pandas==1.5.1
2
+ flair==0.11.3
3
+ nltk==3.7
4
+ deep_translator==1.9.1
5
+ gradio == 3.9.1
setup.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Dec 13 18:06:56 2022
4
+
5
+ @author: gita
6
+ """
7
+
8
+ from distutils.core import setup
9
+ import py2exe
10
+
11
+ setup(
12
+ options={"py2exe": {"bundle_files": 1}},
13
+ console=[{
14
+ "script": "execute_GUI.py"
15
+ }]
16
+ )
src/graph/GUI.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Fri Nov 11 16:01:08 2022
4
+
5
+ @author: Santiago Moreno
6
+ """
7
+
8
+ import os
9
+ import gradio as gr
10
+ import sys
11
+ import json
12
+
13
+
14
+ default_path = os.path.dirname(os.path.abspath(__file__))
15
+ #default_path = default_path.replace('\\', '/')
16
+
17
+ os.chdir(default_path)
18
+ sys.path.insert(0, default_path+'/../scripts')
19
+
20
+ from src.scripts.functionsner import use_model, tag_sentence, json_to_txt, training_model, characterize_data, upsampling_data, usage_cuda, copy_data
21
+ from src.scripts.functionsrc import use_model_rc, training_model_rc, usage_cuda_rc
22
+
23
+ models = os.listdir(default_path+'/../../models')
24
+ models.remove('RC')
25
+ models_rc = os.listdir(default_path+'/../../models/RC')
26
+
27
+ #-------------------------------------------Functions-----------------------------------------------
28
+
29
+ #--------------------------------------NER-----------------------------------
30
+ def Trainer(fast, model_name, standard, input_dir, Upsampling, Cuda):
31
+ if fast: epochs = 1
32
+ else: epochs = 20
33
+
34
+ if Cuda:
35
+ cuda_info = usage_cuda(True)
36
+ else:
37
+ cuda_info = usage_cuda(False)
38
+
39
+
40
+ if standard:
41
+ copy_data(input_dir)
42
+ else:
43
+ Error = json_to_txt(input_dir)
44
+ if type(Error)==int:
45
+ yield 'Error processing the input documents, code error {}'.format(Error)
46
+ if Upsampling:
47
+ yield cuda_info+'\n'+'-'*20+'Upsampling'+'-'*20
48
+ entities_dict=characterize_data()
49
+ entities = list(entities_dict.keys())
50
+ entities_to_upsample = [entities[i] for i,value in enumerate(entities_dict.values()) if value < 200]
51
+ upsampling_data(entities_to_upsample, 0.8, entities)
52
+ yield '-'*20+'Training'+'-'*20
53
+ else:
54
+ yield cuda_info+'\n'+'-'*20+'Training'+'-'*20
55
+ Error = training_model(model_name, epochs)
56
+ if type(Error)==int:
57
+ yield 'Error training the model, code error {}'.format(Error)
58
+ else:
59
+ yield 'Training complete, model {} could be found at models/{}'.format(model_name,model_name)
60
+
61
+
62
+ def Tagger_sentence(Model, Sentence, Cuda):
63
+ if Cuda: cuda_info = usage_cuda(True)
64
+ else: cuda_info = usage_cuda(False)
65
+ yield cuda_info+'\n'+'-'*20+'Tagging'+'-'*20
66
+ results = tag_sentence(Sentence, Model)
67
+ if type(results)==int:
68
+ yield "Error {}, see documentation".format(results)
69
+ else:
70
+ yield results['Highligth']
71
+
72
+ def Tagger_json(Model, Input_file, Output_file, Cuda):
73
+ if Cuda: cuda_info = usage_cuda(True)
74
+ else: cuda_info = usage_cuda(False)
75
+
76
+ with open(Output_file, "w", encoding='utf-8') as write_file:
77
+ json.dump({'error':'error'}, write_file)
78
+
79
+ yield cuda_info+'\n'+'-'*20+'Tagging'+'-'*20, {}, Output_file
80
+
81
+ results = use_model(Model, Input_file.name, Output_file)
82
+ if type(results)==int:
83
+ error_dict = {}
84
+ yield "Error {}, see documentation".format(results), error_dict, Output_file
85
+ else:
86
+ yield { "text" : results['text'], 'entities': results['entities']}, results, Output_file
87
+
88
+
89
+ #--------------------RC-------------------------------
90
+ def Trainer_RC(fast, model_name, input_file, rel2id_file, Cuda):
91
+ if fast: epochs = 1
92
+ else: epochs = 200
93
+
94
+ if Cuda:
95
+ cuda_info = usage_cuda_rc(True)
96
+ else:
97
+ cuda_info = usage_cuda_rc(False)
98
+
99
+
100
+ yield cuda_info+'\n'+'-'*20+'Training'+'-'*20
101
+ Error = training_model_rc(model_name, input_file.name, rel2id_file.name ,epochs)
102
+ if type(Error)==int:
103
+ yield 'Error training the model, code error {}'.format(Error)
104
+ else:
105
+ yield 'Training complete, model {} could be found at models/{}'.format(model_name,model_name)
106
+
107
+
108
+ def Tagger_document_RC(Model, Input_file, Output_file, Cuda):
109
+ if Cuda: cuda_info = usage_cuda_rc(True)
110
+ else: cuda_info = usage_cuda_rc(False)
111
+
112
+ with open(Output_file, "w", encoding='utf-8') as write_file:
113
+ json.dump({'error':'error'}, write_file)
114
+
115
+ yield {'cuda':cuda_info}, Output_file
116
+
117
+ results = use_model_rc(Model, Input_file.name, Output_file)
118
+ if type(results)==int:
119
+ error_dict = {}
120
+ yield error_dict, Output_file
121
+ else:
122
+ yield results, Output_file
123
+
124
+
125
+ #---------------------------------GUI-------------------------------------
126
+ def execute_GUI():
127
+ global models
128
+ with gr.Blocks(title='NER', css="#title {font-size: 150% } #sub {font-size: 120% } ") as demo:
129
+
130
+ gr.Markdown("Named Entity Recognition(NER) and Relation Classification (RC) by GITA and Pratec Group S.A.S.",elem_id="title")
131
+ gr.Markdown("Software developed by Santiago Moreno, Daniel Escobar, and Rafael Orozco",elem_id="sub")
132
+ gr.Markdown("Named Entity Recognition(NER) and Relation Classification (RC) System.")
133
+
134
+ with gr.Tab("NER"):
135
+ gr.Markdown("Use Tagger to apply NER from a pretrained model in a sentence or a given document in INPUT (.JSON) format.")
136
+ gr.Markdown("Use Trainer to train a new NER model from a directory of documents in PRATECH (.JSON) format.")
137
+ with gr.Tab("Tagger"):
138
+ with gr.Tab("Sentence"):
139
+ with gr.Row():
140
+ with gr.Column():
141
+ b = gr.Radio(list(models), label='Model')
142
+ inputs =[
143
+ b,
144
+ gr.Textbox(placeholder="Enter sentence here...", label='Sentence'),
145
+ gr.Radio([True,False], label='CUDA', value=False),
146
+ ]
147
+ tagger_sen = gr.Button("Tag")
148
+ output = gr.HighlightedText()
149
+
150
+
151
+
152
+ tagger_sen.click(Tagger_sentence, inputs=inputs, outputs=output)
153
+ b.change(fn=lambda value: gr.update(choices=list(os.listdir('../../models')).remove('RC')), inputs=b, outputs=b)
154
+ gr.Examples(
155
+
156
+ examples=[
157
+ ['CCC',"Camara de comercio de medellín. El ciudadano JAIME JARAMILLO VELEZ identificado con C.C. 12546987 ingresó al plantel el día 1/01/2022"],
158
+ ['CCC',"Razón Social GASEOSAS GLACIAR S.A.S, ACTIVIDAD PRINCIPAL fabricación y distribución de bebidas endulzadas"]
159
+ ],
160
+ inputs=inputs
161
+ )
162
+
163
+
164
+ with gr.Tab("Document"):
165
+ with gr.Row():
166
+ with gr.Column():
167
+ c = gr.Radio(list(models), label='Model')
168
+ inputs =[
169
+ c,
170
+ gr.File(label='Input data file'),
171
+ gr.Textbox(placeholder="Enter path here...", label='Output data file path'), #value='../../data/Tagged/document_tagged.json'),
172
+ gr.Radio([True,False], label='CUDA', value=False),
173
+ ]
174
+ tagger_json = gr.Button("Tag")
175
+ output = [
176
+ gr.HighlightedText(),
177
+ gr.JSON(),
178
+ gr.File(),
179
+ ]
180
+
181
+ models = os.listdir(default_path+'/../../models')
182
+ models.remove('RC')
183
+
184
+ tagger_json.click(Tagger_json, inputs=inputs, outputs=output)
185
+ c.change(fn=lambda value: gr.update(choices=list(os.listdir('../../models')).remove('RC')), inputs=c, outputs=c)
186
+
187
+
188
+ with gr.Tab("Trainer"):
189
+ with gr.Row():
190
+ with gr.Column():
191
+ train_input = inputs =[
192
+ gr.Radio([True,False], label='Fast training', value=True),
193
+ gr.Textbox(placeholder="Enter model name here...", label='New model name'),
194
+ gr.Radio([True,False], label='Standard input', value=False),
195
+ gr.Textbox(placeholder="Enter path here...", label='Input data directory path'),
196
+ gr.Radio([True,False], label='Upsampling', value=False),
197
+ gr.Radio([True,False], label='CUDA', value=False),
198
+ ]
199
+ trainer = gr.Button("Train")
200
+ train_output = gr.TextArea(placeholder="Output information", label='Output')
201
+
202
+
203
+ with gr.Tab("RC"):
204
+ gr.Markdown("Use Tagger to apply RC from a pretrained model in document in (.TXT) CONLL04 format.")
205
+ gr.Markdown("Use Trainer to train a new RC model from a file (.TXT) CONLL04 format and the rel2id file (.JSON).")
206
+ with gr.Tab("Tagger Document"):
207
+
208
+ with gr.Row():
209
+ with gr.Column():
210
+ c = gr.Radio(list(models_rc), label='Model')
211
+ inputs =[
212
+ c,
213
+ gr.File(label='Input data file'),
214
+ gr.Textbox(placeholder="Enter path here...", label='Output data file path (.JSON)'), #value='../../data/Tagged/document_tagged.json'),
215
+ gr.Radio([True,False], label='CUDA', value=False),
216
+ ]
217
+ tagger_json = gr.Button("Tag")
218
+ output = [
219
+ gr.JSON(),
220
+ gr.File(),
221
+ ]
222
+
223
+ tagger_json.click(Tagger_document_RC, inputs=inputs, outputs=output)
224
+ c.change(fn=lambda value: gr.update(choices=list(os.listdir('../../models/RC'))), inputs=c, outputs=c)
225
+
226
+ with gr.Tab("Trainer"):
227
+ with gr.Row():
228
+ with gr.Column():
229
+ train_input = inputs =[
230
+ gr.Radio([True,False], label='Fast training', value=True),
231
+ gr.Textbox(placeholder="Enter model name here...", label='New model name'),
232
+ gr.File(label='Input train file (.TXT)'),
233
+ gr.File(label='Input rel2id file (.JSON)'),
234
+ gr.Radio([True,False], label='CUDA', value=False),
235
+ ]
236
+ trainer = gr.Button("Train")
237
+ train_output = gr.TextArea(placeholder="Output information", label='Output')
238
+
239
+ trainer.click(Trainer_RC, inputs=train_input, outputs=train_output)
240
+
241
+
242
+
243
+ demo.queue()
244
+ demo.launch(server_name="0.0.0.0", server_port=8080,inbrowser=True, share = True)
245
+
246
+
src/graph/__pycache__/GUI.cpython-311.pyc ADDED
Binary file (17.9 kB). View file
 
src/graph/__pycache__/GUI.cpython-39.pyc ADDED
Binary file (7.57 kB). View file
 
src/graph/out ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sentences": {"tokens": [["The", "system", "as", "described", "above", "has", "its", "greatest", "application", "in", "an", "arrayed", "configuration", "of", "antenna", "elements", "."], ["The", "child", "was", "carefully", "wrapped", "and", "bound", "into", "the", "cradle", "by", "means", "of", "a", "cord", "."], ["The", "author", "of", "a", "keygen", "uses", "a", "disassembler", "to", "look", "at", "the", "raw", "assembly", "code", "."], ["A", "misty", "ridge", "uprises", "from", "the", "surge", "."], ["The", "student", "association", "is", "the", "voice", "of", "the", "undergraduate", "student", "population", "of", "the", "State", "University", "of", "New", "York", "at", "Buffalo", "."], ["This", "is", "the", "sprawling", "complex", "that", "is", "Peru", "'s", "largest", "producer", "of", "silver", "."], ["The", "current", "view", "is", "that", "the", "chronic", "inflammation", "in", "the", "distal", "part", "of", "the", "stomach", "caused", "by", "Helicobacter", "pylori", "infection", "results", "in", "an", "increased", "acid", "production", "from", "the", "non-infected", "upper", "corpus", "region", "of", "the", "stomach", "."], ["People", "have", "been", "moving", "back", "into", "downtown", "."], ["The", "lawsonite", "was", "contained", "in", "a", "platinum", "crucible", "and", "the", "counter-weight", "was", "a", "plastic", "crucible", "with", "metal", "pieces", "."], ["The", "solute", "was", "placed", "inside", "a", "beaker", "and", "5", "mL", "of", "the", "solvent", "was", "pipetted", "into", "a", "25", "mL", "glass", "flask", "for", "each", "trial", "."], ["The", "fifty", "essays", "collected", "in", "this", "volume", "testify", "to", "most", "of", "the", "prominent", "themes", "from", "Professor", "Quispel", "'s", "scholarly", "career", "."], ["Their", "composer", "has", "sunk", "into", "oblivion", "."], ["The", "Pulitzer", "Committee", "issues", "an", "official", "citation", "explaining", "the", "reasons", "for", "the", "award", "."], ["The", "burst", "has", "been", "caused", "by", "water", "hammer", "pressure", "."], ["Even", "commercial", "networks", "have", "moved", "into", "high-definition", "broadcast", "."], ["It", "was", "a", "friendly", "call", "to", "remind", "them", "about", "the", "bill", "and", "make", "sure", "they", "have", "a", "copy", "of", "the", "invoice", "."], ["Texas-born", "virtuoso", "finds", "harmony", ",", "sophistication", "in", "Appalachian", "instrument", "."], ["The", "factory", "'", "s", "products", "have", "included", "flower", "pots", ",", "Finnish", "rooster-whistles", ",", "pans", ",", "trays", ",", "tea", "pots", ",", "ash", "trays", "and", "air", "moisturisers", "."], ["The", "girl", "showed", "a", "photo", "of", "apple", "tree", "blossom", "on", "a", "fruit", "tree", "in", "the", "Central", "Valley", "."], ["They", "tried", "an", "assault", "of", "their", "own", "an", "hour", "later", ",", "with", "two", "columns", "of", "sixteen", "tanks", "backed", "by", "a", "battalion", "of", "Panzer", "grenadiers", "."], ["Their", "knowledge", "of", "the", "power", "and", "rank", "symbols", "of", "the", "Continental", "empires", "was", "gained", "from", "the", "numerous", "Germanic", "recruits", "in", "the", "Roman", "army", ",", "and", "from", "the", "Roman", "practice", "of", "enfeoffing", "various", "Germanic", "warrior", "groups", "with", "land", "in", "the", "imperial", "provinces", "."], ["She", "soon", "had", "a", "stable", "of", "her", "own", "rescued", "hounds", "."], ["The", "singer", ",", "who", "performed", "three", "of", "the", "nominated", "songs", ",", "also", "caused", "a", "commotion", "on", "the", "red", "carpet", "."], ["His", "intellectually", "engaging", "books", "and", "essays", "remain", "pertinent", "to", "illuminating", "contemporary", "history", "."], ["Poor", "hygiene", "controls", ",", "reports", "of", "a", "brace", "of", "gamey", "grouse", "and", "what", "looked", "like", "a", "skinned", "fox", "all", "amounted", "to", "a", "pie", "that", "was", "unfit", "for", "human", "consumption", "."], ["This", "sweet", "dress", "is", "made", "with", "a", "blend", "of", "cotton", "and", "silk", ",", "and", "the", "crochet", "flower", "necklace", "is", "the", "perfect", "accessory", "."], ["Suicide", "is", "one", "of", "the", "leading", "causes", "of", "death", "among", "pre-adolescents", "and", "teens", ",", "and", "victims", "of", "bullying", "are", "at", "an", "increased", "risk", "for", "committing", "suicide", "."], ["This", "article", "gives", "details", "on", "2004", "in", "music", "in", "the", "United", "Kingdom", ",", "including", "the", "official", "charts", "from", "that", "year", "."], ["We", "have", "therefore", "taken", "the", "initiative", "to", "convene", "the", "first", "international", "open", "meeting", "dedicated", "solely", "to", "rural", "history", "."], ["The", "timer", "of", "the", "device", "automatically", "eliminates", "wasted", "\"", "standby", "power", "\"", "consumption", "by", "automatically", "turn", "off", "electronics", "plugged", "into", "the", "\"", "auto", "off", "\"", "outlets", "."], ["Bob", "Parks", "made", "a", "similar", "offer", "in", "a", "phone", "call", "made", "earlier", "this", "week", "."], ["He", "had", "chest", "pains", "and", "headaches", "from", "mold", "in", "the", "bedrooms", "."], ["The", "silver-haired", "author", "was", "not", "just", "laying", "India", "'s", "politician", "saint", "to", "rest", "but", "healing", "a", "generations-old", "rift", "in", "the", "family", "of", "the", "country", "'", "s", "founding", "father", "."], ["It", "describes", "a", "method", "for", "loading", "a", "horizontal", "stack", "of", "containers", "into", "a", "carton", "."], ["The", "Foundation", "decided", "to", "repurpose", "the", "building", "in", "order", "to", "reduce", "wear", "and", "tear", "on", "the", "plumbing", "in", "the", "manor", "house", "by", "redirecting", "visitors", "during", "restoration", "projects", "and", "beyond", "."], ["The", "technology", "is", "available", "to", "produce", "and", "transmit", "electricity", "economically", "from", "OTEC", "systems", "."], ["The", "Medicare", "buy-in", "plan", "ran", "into", "Senate", "resistance", "."], ["The", "provinces", "are", "divided", "into", "counties", "(", "shahrestan", ")", ",", "and", "subdivided", "into", "districts", "(", "bakhsh", ")", "and", "sub-districts", "(", "dehestan", ")", "."], ["Financial", "stress", "is", "one", "of", "the", "main", "causes", "of", "divorce", "."], ["Newspapers", "swap", "content", "via", "widgets", "with", "the", "help", "of", "the", "newsgator", "service", "."], ["The", "women", "that", "caused", "the", "accident", "was", "on", "the", "cell", "phone", "and", "ran", "thru", "the", "intersection", "without", "pausing", "on", "the", "median", "."], ["The", "transmitter", "was", "discovered", "inside", "a", "bed", "settee", "suite", "on", "which", "he", "had", "been", "sitting", "."], ["The", "Kerala", "backwaters", "are", "a", "chain", "of", "brackish", "lagoons", "and", "lakes", "lying", "parallel", "to", "the", "Arabian", "Sea", "coast", "of", "Kerala", "state", "in", "southern", "India", "."], ["A", "St.", "Paul", "College", "student", "was", "released", "from", "jail", "Wednesday", "night", ",", "after", "his", "arrest", "Tuesday", "in", "the", "alleged", "rape", "of", "another", "student", "on", "campus", "."], ["Calluses", "are", "caused", "by", "improperly", "fitting", "shoes", "or", "by", "a", "skin", "abnormality", "."], ["Adults", "use", "drugs", "for", "this", "purpose", "."], ["The", "councilor", "proposed", "assessing", "infinitival", "complements", "through", "elicitation", "."], ["As", "in", "the", "popular", "movie", "\"", "Deep", "Impact", "\"", ",", "the", "action", "of", "the", "Perseid", "meteor", "shower", "is", "caused", "by", "a", "comet", ",", "in", "this", "case", "periodic", "comet", "Swift-Tuttle", "."], ["The", "following", "information", "appeared", "in", "the", "notes", "to", "consolidated", "financial", "statements", "of", "some", "corporate", "annual", "reports", "."], ["HipHop", "appropriates", "the", "symbols", "of", "a", "consumer", "society", ":", "oversized", "diamond", "colliers", "are", "worn", "."], ["The", "radiation", "from", "the", "atomic", "bomb", "explosion", "is", "a", "typical", "acute", "radiation", "."], ["The", "ride-on", "boat", "tiller", "was", "developed", "by", "engineers", "Arnold", "S.", "Juliano", "and", "Dr.", "Eulito", "U.", "Bautista", "."], ["A", "neoplastic", "recurrence", "arose", "from", "an", "extensive", "radiation", "induced", "ulceration", "."]], "entities": [["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Whole", "O", "O", "Component", "O"], ["O", "Orelation1", "O", "O", "O", "O", "O", "O", "O", "Orelation2", "O", "O", "O", "O", "O", "O"], ["O", "Agency", "O", "O", "O", "O", "O", "Instrument", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "Orelation1", "O", "O", "O", "Orelation2", "O"], ["O", "Member", "Collection", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "Orelation1", "O", "O", "O", "O", "O", "Orelation2", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "Effect", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Cause", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["Entity", "O", "O", "O", "O", "O", "Destination", "O"], ["O", "Content", "O", "O", "O", "O", "Container", "Container", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Entity", "O", "O", "O", "O", "O", "O", "O", "Destination", "O", "O", "O", "O"], ["O", "O", "Member", "O", "O", "O", "Collection", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Orelation1", "O", "O", "O", "Orelation2", "O"], ["O", "O", "O", "O", "O", "O", "Message", "O", "O", "Topic", "O", "O", "O", "O"], ["O", "Effect", "O", "O", "O", "O", "O", "O", "Cause", "O"], ["O", "O", "Agency", "O", "O", "O", "Instrument", "Instrument", "O"], ["O", "O", "O", "O", "Message", "O", "O", "O", "O", "O", "Topic", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Agency", "O", "O", "O", "O", "O", "O", "Instrument", "O"], ["O", "Producer", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Product", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "Whole", "Component", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Collection", "O", "O", "Member", "O"], ["O", "Entity", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Origin", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "Collection", "O", "O", "O", "O", "Member", "O"], ["O", "Cause", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Effect", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "Orelation1", "O", "O", "O", "O", "O", "Orelation2", "O"], ["O", "O", "O", "O", "O", "O", "O", "Collection", "O", "O", "Member", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "Orelation1", "O", "O", "O", "O", "Orelation2", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["Cause", "O", "O", "O", "O", "O", "O", "O", "Effect", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Message", "O", "O", "O", "O", "O", "Topic", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Message", "O", "O", "O", "Topic", "Topic", "O"], ["O", "Component", "O", "O", "Whole", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "Topic", "O", "O", "Message", "Message", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "Effect", "O", "Cause", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Product", "O", "O", "O", "Producer", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "Entity", "O", "O", "O", "O", "Destination", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Component", "O", "O", "Whole", "Whole", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "Entity", "O", "O", "O", "Origin", "O"], ["O", "O", "O", "Orelation1", "O", "O", "O", "Orelation2", "O"], ["O", "Whole", "O", "O", "O", "Component", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Cause", "O", "O", "O", "O", "O", "O", "O", "Effect", "O"], ["Agency", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Instrument", "O"], ["O", "Cause", "O", "O", "O", "Effect", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Content", "O", "O", "O", "O", "O", "O", "Container", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "Collection", "O", "O", "Member", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "Entity", "O", "O", "O", "Origin", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["Effect", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Cause", "Cause", "O"], ["Agency", "O", "Instrument", "O", "O", "O", "O"], ["O", "Agency", "O", "O", "O", "O", "O", "Instrument", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Effect", "Effect", "O", "O", "O", "O", "Cause", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "Orelation1", "O", "O", "O", "Orelation2", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Orelation1", "Orelation2", "O", "O", "O"], ["O", "Effect", "O", "O", "O", "Cause", "Cause", "O", "O", "O", "O", "O", "O"], ["O", "O", "Whole", "Component", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "Effect", "O", "O", "O", "O", "Cause", "O", "O", "O"]]}, "relations": ["Cause-Effect", "Message-Topic", "Other", "Cause-Effect", "Other", "Cause-Effect", "Other", "Entity-Origin", "Cause-Effect", "Other", "Cause-Effect", "Cause-Effect", "Other", "Cause-Effect", "Content-Container", "Other", "Other", "Cause-Effect", "Cause-Effect", "Other", "Cause-Effect", "Message-Topic", "Cause-Effect", "Message-Topic", "Cause-Effect", "Other", "Cause-Effect", "Other", "Other", "Cause-Effect", "Other", "Cause-Effect", "Other", "Other", "Message-Topic", "Other", "Other", "Other", "Other", "Other", "Cause-Effect", "Cause-Effect", "Other", "Other", "Message-Topic", "Cause-Effect", "Other", "Other", "Cause-Effect", "Other", "Message-Topic", "Cause-Effect", "Other"]}
src/graph/out.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sentences": {"tokens": [["The", "system", "as", "described", "above", "has", "its", "greatest", "application", "in", "an", "arrayed", "configuration", "of", "antenna", "elements", "."], ["The", "child", "was", "carefully", "wrapped", "and", "bound", "into", "the", "cradle", "by", "means", "of", "a", "cord", "."], ["The", "author", "of", "a", "keygen", "uses", "a", "disassembler", "to", "look", "at", "the", "raw", "assembly", "code", "."], ["A", "misty", "ridge", "uprises", "from", "the", "surge", "."], ["The", "student", "association", "is", "the", "voice", "of", "the", "undergraduate", "student", "population", "of", "the", "State", "University", "of", "New", "York", "at", "Buffalo", "."], ["This", "is", "the", "sprawling", "complex", "that", "is", "Peru", "'s", "largest", "producer", "of", "silver", "."], ["The", "current", "view", "is", "that", "the", "chronic", "inflammation", "in", "the", "distal", "part", "of", "the", "stomach", "caused", "by", "Helicobacter", "pylori", "infection", "results", "in", "an", "increased", "acid", "production", "from", "the", "non-infected", "upper", "corpus", "region", "of", "the", "stomach", "."], ["People", "have", "been", "moving", "back", "into", "downtown", "."], ["The", "lawsonite", "was", "contained", "in", "a", "platinum", "crucible", "and", "the", "counter-weight", "was", "a", "plastic", "crucible", "with", "metal", "pieces", "."], ["The", "solute", "was", "placed", "inside", "a", "beaker", "and", "5", "mL", "of", "the", "solvent", "was", "pipetted", "into", "a", "25", "mL", "glass", "flask", "for", "each", "trial", "."], ["The", "fifty", "essays", "collected", "in", "this", "volume", "testify", "to", "most", "of", "the", "prominent", "themes", "from", "Professor", "Quispel", "'s", "scholarly", "career", "."], ["Their", "composer", "has", "sunk", "into", "oblivion", "."], ["The", "Pulitzer", "Committee", "issues", "an", "official", "citation", "explaining", "the", "reasons", "for", "the", "award", "."], ["The", "burst", "has", "been", "caused", "by", "water", "hammer", "pressure", "."], ["Even", "commercial", "networks", "have", "moved", "into", "high-definition", "broadcast", "."], ["It", "was", "a", "friendly", "call", "to", "remind", "them", "about", "the", "bill", "and", "make", "sure", "they", "have", "a", "copy", "of", "the", "invoice", "."], ["Texas-born", "virtuoso", "finds", "harmony", ",", "sophistication", "in", "Appalachian", "instrument", "."], ["The", "factory", "'", "s", "products", "have", "included", "flower", "pots", ",", "Finnish", "rooster-whistles", ",", "pans", ",", "trays", ",", "tea", "pots", ",", "ash", "trays", "and", "air", "moisturisers", "."], ["The", "girl", "showed", "a", "photo", "of", "apple", "tree", "blossom", "on", "a", "fruit", "tree", "in", "the", "Central", "Valley", "."], ["They", "tried", "an", "assault", "of", "their", "own", "an", "hour", "later", ",", "with", "two", "columns", "of", "sixteen", "tanks", "backed", "by", "a", "battalion", "of", "Panzer", "grenadiers", "."], ["Their", "knowledge", "of", "the", "power", "and", "rank", "symbols", "of", "the", "Continental", "empires", "was", "gained", "from", "the", "numerous", "Germanic", "recruits", "in", "the", "Roman", "army", ",", "and", "from", "the", "Roman", "practice", "of", "enfeoffing", "various", "Germanic", "warrior", "groups", "with", "land", "in", "the", "imperial", "provinces", "."], ["She", "soon", "had", "a", "stable", "of", "her", "own", "rescued", "hounds", "."], ["The", "singer", ",", "who", "performed", "three", "of", "the", "nominated", "songs", ",", "also", "caused", "a", "commotion", "on", "the", "red", "carpet", "."], ["His", "intellectually", "engaging", "books", "and", "essays", "remain", "pertinent", "to", "illuminating", "contemporary", "history", "."], ["Poor", "hygiene", "controls", ",", "reports", "of", "a", "brace", "of", "gamey", "grouse", "and", "what", "looked", "like", "a", "skinned", "fox", "all", "amounted", "to", "a", "pie", "that", "was", "unfit", "for", "human", "consumption", "."], ["This", "sweet", "dress", "is", "made", "with", "a", "blend", "of", "cotton", "and", "silk", ",", "and", "the", "crochet", "flower", "necklace", "is", "the", "perfect", "accessory", "."], ["Suicide", "is", "one", "of", "the", "leading", "causes", "of", "death", "among", "pre-adolescents", "and", "teens", ",", "and", "victims", "of", "bullying", "are", "at", "an", "increased", "risk", "for", "committing", "suicide", "."], ["This", "article", "gives", "details", "on", "2004", "in", "music", "in", "the", "United", "Kingdom", ",", "including", "the", "official", "charts", "from", "that", "year", "."], ["We", "have", "therefore", "taken", "the", "initiative", "to", "convene", "the", "first", "international", "open", "meeting", "dedicated", "solely", "to", "rural", "history", "."], ["The", "timer", "of", "the", "device", "automatically", "eliminates", "wasted", "\"", "standby", "power", "\"", "consumption", "by", "automatically", "turn", "off", "electronics", "plugged", "into", "the", "\"", "auto", "off", "\"", "outlets", "."], ["Bob", "Parks", "made", "a", "similar", "offer", "in", "a", "phone", "call", "made", "earlier", "this", "week", "."], ["He", "had", "chest", "pains", "and", "headaches", "from", "mold", "in", "the", "bedrooms", "."], ["The", "silver-haired", "author", "was", "not", "just", "laying", "India", "'s", "politician", "saint", "to", "rest", "but", "healing", "a", "generations-old", "rift", "in", "the", "family", "of", "the", "country", "'", "s", "founding", "father", "."], ["It", "describes", "a", "method", "for", "loading", "a", "horizontal", "stack", "of", "containers", "into", "a", "carton", "."], ["The", "Foundation", "decided", "to", "repurpose", "the", "building", "in", "order", "to", "reduce", "wear", "and", "tear", "on", "the", "plumbing", "in", "the", "manor", "house", "by", "redirecting", "visitors", "during", "restoration", "projects", "and", "beyond", "."], ["The", "technology", "is", "available", "to", "produce", "and", "transmit", "electricity", "economically", "from", "OTEC", "systems", "."], ["The", "Medicare", "buy-in", "plan", "ran", "into", "Senate", "resistance", "."], ["The", "provinces", "are", "divided", "into", "counties", "(", "shahrestan", ")", ",", "and", "subdivided", "into", "districts", "(", "bakhsh", ")", "and", "sub-districts", "(", "dehestan", ")", "."], ["Financial", "stress", "is", "one", "of", "the", "main", "causes", "of", "divorce", "."], ["Newspapers", "swap", "content", "via", "widgets", "with", "the", "help", "of", "the", "newsgator", "service", "."], ["The", "women", "that", "caused", "the", "accident", "was", "on", "the", "cell", "phone", "and", "ran", "thru", "the", "intersection", "without", "pausing", "on", "the", "median", "."], ["The", "transmitter", "was", "discovered", "inside", "a", "bed", "settee", "suite", "on", "which", "he", "had", "been", "sitting", "."], ["The", "Kerala", "backwaters", "are", "a", "chain", "of", "brackish", "lagoons", "and", "lakes", "lying", "parallel", "to", "the", "Arabian", "Sea", "coast", "of", "Kerala", "state", "in", "southern", "India", "."], ["A", "St.", "Paul", "College", "student", "was", "released", "from", "jail", "Wednesday", "night", ",", "after", "his", "arrest", "Tuesday", "in", "the", "alleged", "rape", "of", "another", "student", "on", "campus", "."], ["Calluses", "are", "caused", "by", "improperly", "fitting", "shoes", "or", "by", "a", "skin", "abnormality", "."], ["Adults", "use", "drugs", "for", "this", "purpose", "."], ["The", "councilor", "proposed", "assessing", "infinitival", "complements", "through", "elicitation", "."], ["As", "in", "the", "popular", "movie", "\"", "Deep", "Impact", "\"", ",", "the", "action", "of", "the", "Perseid", "meteor", "shower", "is", "caused", "by", "a", "comet", ",", "in", "this", "case", "periodic", "comet", "Swift-Tuttle", "."], ["The", "following", "information", "appeared", "in", "the", "notes", "to", "consolidated", "financial", "statements", "of", "some", "corporate", "annual", "reports", "."], ["HipHop", "appropriates", "the", "symbols", "of", "a", "consumer", "society", ":", "oversized", "diamond", "colliers", "are", "worn", "."], ["The", "radiation", "from", "the", "atomic", "bomb", "explosion", "is", "a", "typical", "acute", "radiation", "."], ["The", "ride-on", "boat", "tiller", "was", "developed", "by", "engineers", "Arnold", "S.", "Juliano", "and", "Dr.", "Eulito", "U.", "Bautista", "."], ["A", "neoplastic", "recurrence", "arose", "from", "an", "extensive", "radiation", "induced", "ulceration", "."]], "entities": [["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Whole", "O", "O", "Component", "O"], ["O", "Orelation1", "O", "O", "O", "O", "O", "O", "O", "Orelation2", "O", "O", "O", "O", "O", "O"], ["O", "Agency", "O", "O", "O", "O", "O", "Instrument", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "Orelation1", "O", "O", "O", "Orelation2", "O"], ["O", "Member", "Collection", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "Orelation1", "O", "O", "O", "O", "O", "Orelation2", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "Effect", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Cause", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["Entity", "O", "O", "O", "O", "O", "Destination", "O"], ["O", "Content", "O", "O", "O", "O", "Container", "Container", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Entity", "O", "O", "O", "O", "O", "O", "O", "Destination", "O", "O", "O", "O"], ["O", "O", "Member", "O", "O", "O", "Collection", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Orelation1", "O", "O", "O", "Orelation2", "O"], ["O", "O", "O", "O", "O", "O", "Message", "O", "O", "Topic", "O", "O", "O", "O"], ["O", "Effect", "O", "O", "O", "O", "O", "O", "Cause", "O"], ["O", "O", "Agency", "O", "O", "O", "Instrument", "Instrument", "O"], ["O", "O", "O", "O", "Message", "O", "O", "O", "O", "O", "Topic", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Agency", "O", "O", "O", "O", "O", "O", "Instrument", "O"], ["O", "Producer", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Product", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "Whole", "Component", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Collection", "O", "O", "Member", "O"], ["O", "Entity", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Origin", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "Collection", "O", "O", "O", "O", "Member", "O"], ["O", "Cause", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Effect", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "Orelation1", "O", "O", "O", "O", "O", "Orelation2", "O"], ["O", "O", "O", "O", "O", "O", "O", "Collection", "O", "O", "Member", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "Orelation1", "O", "O", "O", "O", "Orelation2", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["Cause", "O", "O", "O", "O", "O", "O", "O", "Effect", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Message", "O", "O", "O", "O", "O", "Topic", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Message", "O", "O", "O", "Topic", "Topic", "O"], ["O", "Component", "O", "O", "Whole", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "Topic", "O", "O", "Message", "Message", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "Effect", "O", "Cause", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Product", "O", "O", "O", "Producer", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "Entity", "O", "O", "O", "O", "Destination", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Component", "O", "O", "Whole", "Whole", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "Entity", "O", "O", "O", "Origin", "O"], ["O", "O", "O", "Orelation1", "O", "O", "O", "Orelation2", "O"], ["O", "Whole", "O", "O", "O", "Component", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Cause", "O", "O", "O", "O", "O", "O", "O", "Effect", "O"], ["Agency", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Instrument", "O"], ["O", "Cause", "O", "O", "O", "Effect", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "Content", "O", "O", "O", "O", "O", "O", "Container", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "Collection", "O", "O", "Member", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "Entity", "O", "O", "O", "Origin", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["Effect", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Cause", "Cause", "O"], ["Agency", "O", "Instrument", "O", "O", "O", "O"], ["O", "Agency", "O", "O", "O", "O", "O", "Instrument", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Effect", "Effect", "O", "O", "O", "O", "Cause", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "Orelation1", "O", "O", "O", "Orelation2", "O", "O", "O", "O", "O", "O"], ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "Orelation1", "Orelation2", "O", "O", "O"], ["O", "Effect", "O", "O", "O", "Cause", "Cause", "O", "O", "O", "O", "O", "O"], ["O", "O", "Whole", "Component", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"], ["O", "O", "Effect", "O", "O", "O", "O", "Cause", "O", "O", "O"]]}, "relations": ["Component-Whole", "Other", "Instrument-Agency", "Entity-Destination", "Member-Collection", "Product-Producer", "Cause-Effect", "Product-Producer", "Content-Container", "Entity-Destination", "Member-Collection", "Other", "Message-Topic", "Cause-Effect", "Instrument-Agency", "Message-Topic", "Instrument-Agency", "Product-Producer", "Other", "Member-Collection", "Other", "Member-Collection", "Cause-Effect", "Other", "Member-Collection", "Other", "Other", "Message-Topic", "Message-Topic", "Component-Whole", "Message-Topic", "Cause-Effect", "Product-Producer", "Content-Container", "Component-Whole", "Entity-Origin", "Other", "Component-Whole", "Cause-Effect", "Instrument-Agency", "Cause-Effect", "Content-Container", "Member-Collection", "Entity-Origin", "Other", "Product-Producer", "Instrument-Agency", "Cause-Effect", "Other", "Other", "Cause-Effect", "Component-Whole", "Cause-Effect"]}
src/graph/out_file.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"text": "CAMARA DE COMERCIO DE BOGOTA SUPERCADE AMERICAS 21 DE NOVIEMBRE DE 2011 CAMARA DE COMEROOO DE BOGOTA CERTIFICADO DE MATRICULA DE PERSONA NATURAL LA CAMARA DE COMERCIO DE BOGOTA, CON FUNDAMENTO EN LAS MATRICULAS E INSCRIPCIONES DEL REGISTRO MERCANTIL CERTIEICA: NOMBRE RODRIGUEZ MURCIA JULIO C.C 194459 NIT: 194459-9 CERTIFICA: MATRICULA NO 00784836 DEL 23 DE ABRIL DE 1997 CERTIEICA DIRECCION DE NO IFICACION JUDICIAL : CL 54 SUR No 87G-02 MUNICIPIO BOGOTA D.C. EMAIL NOTIFICAC\u0130ON JUDICIAL : CARPINTERIAMETALICAS RODRIGUEZ@HOTMAIL.CO DIRECCION COMERCIAL DG 54 NO 87G-12 MUNICIPIO : BOGOTA D.C EMAIL COMERCIAL: CARPINTERIAMETALICASRODRIGUEZCHOTMAIL.CO *ADVERTENCIA ESTOS DATOS CORRESPONDEN A LA ULTIMA INFORMACION ** SUMINISTRADA POR EL COMERCIANTE EN EL FORMULARIO DE MATRICULA* Y/O RENOVACION DEL A\u00d1O 2010 CERTIEICA: QUE EL COMERCIANTE NO HA CUMPLIDO CON LA OBLIGACION LEGAL DE RENOVAR SU MATRICULA MERCANTIL DESDE 2011 CERTIFICA RENOVACION DE LA MATR\u00cdCULA :EL 15 DE JUNIO DE 2010 ULTIMO A\u00d1O RENOVADO2010 TOTAL ACTIVOS TOTAL ACTIVOS SIN AJUSTES POR INFLACION 95,000,000 ACTIVIDAD ECONOMICA FABRICACION ELEMENTOS METALICOS, ORNAMENTACION PUERTAS, VENTANAS, REJAS Y VERJAS / ALQUILER DE VEHICULOS DE CARGA CON CONDUCTOR : 120,000,000 CERTIFICA: PROPIETARIO DE LOS SIGUIENTES ESTABLECIMIENTOS DE Comercio NOMBRE CARPINTERIA METALICA RODRIGUEZ DIRECCION COMERCIAL : DG 54 SUR No 87G-02 MUNICIPIO : BOGOTA D.C MATRICULA NO 00784839 DE 23 DE ABRIL DE 1997 RENOVACION DE LA MATRICULA EL 15 DE JUNIO DE 2010 ULTIMO A\u00d1O RENOVADO 2010 CERTIFICA LA INFORMACION ANTERIOR HA SIDO TOMADA DIRECTAMENTE DEL FORMULARIO DE MATRICULA DILIGENCIADO POR EL COMERCIANTE DE CONFORMIDAD CON LO ESTABLECIDO POR LA LEY 962 DE 2005, LOS ACTOS DE REGISTRO-AQUI-CERTIFICADOS-'QUEDAN EN-FIRME, CINCO-H DIAS-HABI DESPUES DE LA FECHA DE INSCRIPCION,SIEMPRE QUE NO SEAN OBJETO DE RECURSOS EN LA VIA GUBERNAT\u012aVA **EL PRESENTE CERTIFICADO NO CONSTITUYE PERMISO DE FUNCIONAMIENTO EN NINGUN CASO SE\u00d1OR EMPRESARIO, SI SU EMPRESA TIENE ACTIVOS INFERIORES A 30.000 SMLMV Y UNA PLANTA DE PERSONAL DE MENOS DE 200 TRABAJADORES, USTED TIENE DERECHO A RECIBIR UN DESCUENTO EN EL PAGO DE LOS PARAFISCALES DE 75% EN EL PRIMER A\u00d1O DE CONSTITUCION DE SU EMPRESA, DE 50% EN EL SEGUNDO A\u00d1O Y DE 25% EN EL TERCER A\u00d1O.LEY 590 DE 2000 Y DECRETO 525 DE 2009.EL SECRETARIO DE LA CAMARA DE COMERCIO, VALOR $ 1,900 DE CONFORMIDAD CON EL DECRETO 2150 DE 1995 Y LA AUTORIZACION IMPARTIDA POR LA SUPERINTENDENCIA DE INDUSTRIA Y COMERCIO, MEDIANTE EL OFICIO DEL 18 DE NO IEMBRE DE 1996, LA FIRMA MECANICA QUE APARECE A CONTINUACION TIENE PLENA VALIDEZ PARA TODOS LOS EFECTOS LEGALES \"", "text_labeled": "CAMARA DE COMERCIO DE BOGOTA SUPERCADE AMERICAS \"21\"/FCH \"DE\"/FCH \"NOVIEMBRE\"/FCH \"DE\"/FCH \"2011\"/FCH CAMARA DE COMEROOO DE BOGOTA CERTIFICADO DE MATRICULA DE PERSONA NATURAL LA CAMARA DE COMERCIO DE BOGOTA , CON FUNDAMENTO EN LAS MATRICULAS E INSCRIPCIONES DEL REGISTRO MERCANTIL CERTIEICA : NOMBRE \"RODRIGUEZ\"/PER \"MURCIA\"/PER \"JULIO\"/PER \"C.C\"/TDID \"194459\"/DID \"NIT\"/TDID : \"194459-9\"/DID CERTIFICA : \"MATRICULA\"/TFMT NO 00784836 DEL \"23\"/FCH \"DE\"/FCH \"ABRIL\"/FCH \"DE\"/FCH \"1997\"/FCH CERTIEICA DIRECCION DE NO IFICACION JUDICIAL : CL 54 SUR No 87G-02 MUNICIPIO BOGOTA D.C . EMAIL NOTIFICAC\u0130ON JUDICIAL : CARPINTERIAMETALICAS RODRIGUEZ @ HOTMAIL.CO DIRECCION COMERCIAL DG 54 NO 87G-12 MUNICIPIO : BOGOTA D.C EMAIL COMERCIAL : CARPINTERIAMETALICASRODRIGUEZCHOTMAIL.CO * ADVERTENCIA ESTOS DATOS CORRESPONDEN A LA ULTIMA INFORMACION ** SUMINISTRADA POR EL COMERCIANTE EN EL FORMULARIO DE MATRICULA * Y / O RENOVACION DEL A\u00d1O 2010 CERTIEICA : QUE EL COMERCIANTE NO HA CUMPLIDO CON LA OBLIGACION LEGAL DE RENOVAR SU MATRICULA MERCANTIL DESDE 2011 CERTIFICA RENOVACION DE LA MATR\u00cdCULA :EL \"15\"/FCH \"DE\"/FCH \"JUNIO\"/FCH \"DE\"/FCH \"2010\"/FCH ULTIMO A\u00d1O RENOVADO2010 TOTAL ACTIVOS TOTAL ACTIVOS SIN AJUSTES POR INFLACION 95,000,000 \"ACTIVIDAD\"/ACT \"ECONOMICA\"/ACT \"FABRICACION\"/ACT \"ELEMENTOS\"/ACT \"METALICOS\"/ACT \",\"/ACT \"ORNAMENTACION\"/ACT \"PUERTAS\"/ACT \",\"/ACT \"VENTANAS\"/ACT \",\"/ACT \"REJAS\"/ACT \"Y\"/ACT \"VERJAS\"/ACT \"/\"/ACT \"ALQUILER\"/ACT \"DE\"/ACT \"VEHICULOS\"/ACT \"DE\"/ACT \"CARGA\"/ACT \"CON\"/ACT \"CONDUCTOR\"/ACT : 120,000,000 CERTIFICA : PROPIETARIO DE LOS SIGUIENTES \"ESTABLECIMIENTOS\"/TNRS \"DE\"/TNRS Comercio \"NOMBRE\"/TNRS \"CARPINTERIA\"/ORG \"METALICA\"/ORG \"RODRIGUEZ\"/ORG DIRECCION COMERCIAL : DG 54 SUR No 87G-02 MUNICIPIO : BOGOTA D.C \"MATRICULA\"/TFMT NO 00784839 DE \"23\"/FCH \"DE\"/FCH \"ABRIL\"/FCH \"DE\"/FCH \"1997\"/FCH RENOVACION DE LA MATRICULA EL \"15\"/FCH \"DE\"/FCH \"JUNIO\"/FCH \"DE\"/FCH \"2010\"/FCH ULTIMO A\u00d1O RENOVADO 2010 CERTIFICA LA INFORMACION ANTERIOR HA SIDO TOMADA DIRECTAMENTE DEL FORMULARIO DE MATRICULA DILIGENCIADO POR EL COMERCIANTE DE CONFORMIDAD CON LO ESTABLECIDO POR LA LEY 962 DE 2005 , LOS ACTOS DE REGISTRO-AQUI-CERTIFICADOS -' QUEDAN EN-FIRME , CINCO-H DIAS-HABI DESPUES DE LA FECHA DE INSCRIPCION ,SIEMPRE QUE NO SEAN OBJETO DE RECURSOS EN LA VIA GUBERNAT\u012aVA ** EL PRESENTE CERTIFICADO NO CONSTITUYE PERMISO DE FUNCIONAMIENTO EN NINGUN CASO SE\u00d1OR EMPRESARIO , SI SU EMPRESA TIENE ACTIVOS INFERIORES A 30.000 SMLMV Y UNA PLANTA DE PERSONAL DE MENOS DE 200 TRABAJADORES , USTED TIENE DERECHO A RECIBIR UN DESCUENTO EN EL PAGO DE LOS PARAFISCALES DE 75 % EN EL PRIMER A\u00d1O DE CONSTITUCION DE SU EMPRESA , DE 50 % EN EL SEGUNDO A\u00d1O Y DE 25 % EN EL TERCER A\u00d1O .LEY 590 DE 2000 Y DECRETO 525 DE 2009 .EL SECRETARIO DE LA CAMARA DE COMERCIO , VALOR $ 1,900 DE CONFORMIDAD CON EL DECRETO 2150 DE 1995 Y LA AUTORIZACION IMPARTIDA POR LA SUPERINTENDENCIA DE INDUSTRIA Y COMERCIO , MEDIANTE EL OFICIO DEL \"18\"/FCH \"DE\"/FCH \"NO\"/FCH \"IEMBRE\"/FCH \"DE\"/FCH \"1996\"/FCH , LA FIRMA MECANICA QUE APARECE A CONTINUACION TIENE PLENA VALIDEZ PARA TODOS LOS EFECTOS LEGALES \"", "sentences": [{"text": "CAMARA DE COMERCIO DE BOGOTA SUPERCADE AMERICAS 21 DE NOVIEMBRE DE 2011 CAMARA DE COMEROOO DE BOGOTA CERTIFICADO DE MATRICULA DE PERSONA NATURAL LA CAMARA DE COMERCIO DE BOGOTA, CON FUNDAMENTO EN LAS MATRICULAS E INSCRIPCIONES DEL REGISTRO MERCANTIL CERTIEICA: NOMBRE RODRIGUEZ MURCIA JULIO C.C 194459 NIT: 194459-9 CERTIFICA: MATRICULA NO 00784836 DEL 23 DE ABRIL DE 1997 CERTIEICA DIRECCION DE NO IFICACION JUDICIAL : CL 54 SUR No 87G-02 MUNICIPIO BOGOTA D.C. EMAIL NOTIFICAC\u0130ON JUDICIAL : CARPINTERIAMETALICAS RODRIGUEZ@HOTMAIL.CO DIRECCION COMERCIAL DG 54 NO 87G-12 MUNICIPIO : BOGOTA D.C EMAIL COMERCIAL: CARPINTERIAMETALICASRODRIGUEZCHOTMAIL.CO *ADVERTENCIA ESTOS DATOS CORRESPONDEN A LA ULTIMA INFORMACION ** SUMINISTRADA POR EL COMERCIANTE EN EL FORMULARIO DE MATRICULA* Y/O RENOVACION DEL A\u00d1O 2010 CERTIEICA: QUE EL COMERCIANTE NO HA CUMPLIDO CON LA OBLIGACION LEGAL DE RENOVAR SU MATRICULA MERCANTIL DESDE 2011 CERTIFICA RENOVACION DE LA MATR\u00cdCULA :", "text_labeled": "CAMARA DE COMERCIO DE BOGOTA SUPERCADE AMERICAS \"21\"/FCH \"DE\"/FCH \"NOVIEMBRE\"/FCH \"DE\"/FCH \"2011\"/FCH CAMARA DE COMEROOO DE BOGOTA CERTIFICADO DE MATRICULA DE PERSONA NATURAL LA CAMARA DE COMERCIO DE BOGOTA , CON FUNDAMENTO EN LAS MATRICULAS E INSCRIPCIONES DEL REGISTRO MERCANTIL CERTIEICA : NOMBRE \"RODRIGUEZ\"/PER \"MURCIA\"/PER \"JULIO\"/PER \"C.C\"/TDID \"194459\"/DID \"NIT\"/TDID : \"194459-9\"/DID CERTIFICA : \"MATRICULA\"/TFMT NO 00784836 DEL \"23\"/FCH \"DE\"/FCH \"ABRIL\"/FCH \"DE\"/FCH \"1997\"/FCH CERTIEICA DIRECCION DE NO IFICACION JUDICIAL : CL 54 SUR No 87G-02 MUNICIPIO BOGOTA D.C . EMAIL NOTIFICAC\u0130ON JUDICIAL : CARPINTERIAMETALICAS RODRIGUEZ @ HOTMAIL.CO DIRECCION COMERCIAL DG 54 NO 87G-12 MUNICIPIO : BOGOTA D.C EMAIL COMERCIAL : CARPINTERIAMETALICASRODRIGUEZCHOTMAIL.CO * ADVERTENCIA ESTOS DATOS CORRESPONDEN A LA ULTIMA INFORMACION ** SUMINISTRADA POR EL COMERCIANTE EN EL FORMULARIO DE MATRICULA * Y / O RENOVACION DEL A\u00d1O 2010 CERTIEICA : QUE EL COMERCIANTE NO HA CUMPLIDO CON LA OBLIGACION LEGAL DE RENOVAR SU MATRICULA MERCANTIL DESDE 2011 CERTIFICA RENOVACION DE LA MATR\u00cdCULA :", "tokens": [{"text": "CAMARA", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "COMERCIO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "BOGOTA", "label": "O"}, {"text": "SUPERCADE", "label": "O"}, {"text": "AMERICAS", "label": "O"}, {"text": "21", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "NOVIEMBRE", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "2011", "label": "FCH"}, {"text": "CAMARA", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "COMEROOO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "BOGOTA", "label": "O"}, {"text": "CERTIFICADO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "MATRICULA", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "PERSONA", "label": "O"}, {"text": "NATURAL", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "CAMARA", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "COMERCIO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "BOGOTA", "label": "O"}, {"text": ",", "label": "O"}, {"text": "CON", "label": "O"}, {"text": "FUNDAMENTO", "label": "O"}, {"text": "EN", "label": "O"}, {"text": "LAS", "label": "O"}, {"text": "MATRICULAS", "label": "O"}, {"text": "E", "label": "O"}, {"text": "INSCRIPCIONES", "label": "O"}, {"text": "DEL", "label": "O"}, {"text": "REGISTRO", "label": "O"}, {"text": "MERCANTIL", "label": "O"}, {"text": "CERTIEICA", "label": "O"}, {"text": ":", "label": "O"}, {"text": "NOMBRE", "label": "O"}, {"text": "RODRIGUEZ", "label": "PER"}, {"text": "MURCIA", "label": "PER"}, {"text": "JULIO", "label": "PER"}, {"text": "C.C", "label": "TDID"}, {"text": "194459", "label": "DID"}, {"text": "NIT", "label": "TDID"}, {"text": ":", "label": "O"}, {"text": "194459-9", "label": "DID"}, {"text": "CERTIFICA", "label": "O"}, {"text": ":", "label": "O"}, {"text": "MATRICULA", "label": "TFMT"}, {"text": "NO", "label": "O"}, {"text": "00784836", "label": "O"}, {"text": "DEL", "label": "O"}, {"text": "23", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "ABRIL", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "1997", "label": "FCH"}, {"text": "CERTIEICA", "label": "O"}, {"text": "DIRECCION", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "NO", "label": "O"}, {"text": "IFICACION", "label": "O"}, {"text": "JUDICIAL", "label": "O"}, {"text": ":", "label": "O"}, {"text": "CL", "label": "O"}, {"text": "54", "label": "O"}, {"text": "SUR", "label": "O"}, {"text": "No", "label": "O"}, {"text": "87G-02", "label": "O"}, {"text": "MUNICIPIO", "label": "O"}, {"text": "BOGOTA", "label": "O"}, {"text": "D.C", "label": "O"}, {"text": ".", "label": "O"}, {"text": "EMAIL", "label": "O"}, {"text": "NOTIFICAC\u0130ON", "label": "O"}, {"text": "JUDICIAL", "label": "O"}, {"text": ":", "label": "O"}, {"text": "CARPINTERIAMETALICAS", "label": "O"}, {"text": "RODRIGUEZ", "label": "O"}, {"text": "@", "label": "O"}, {"text": "HOTMAIL.CO", "label": "O"}, {"text": "DIRECCION", "label": "O"}, {"text": "COMERCIAL", "label": "O"}, {"text": "DG", "label": "O"}, {"text": "54", "label": "O"}, {"text": "NO", "label": "O"}, {"text": "87G-12", "label": "O"}, {"text": "MUNICIPIO", "label": "O"}, {"text": ":", "label": "O"}, {"text": "BOGOTA", "label": "O"}, {"text": "D.C", "label": "O"}, {"text": "EMAIL", "label": "O"}, {"text": "COMERCIAL", "label": "O"}, {"text": ":", "label": "O"}, {"text": "CARPINTERIAMETALICASRODRIGUEZCHOTMAIL.CO", "label": "O"}, {"text": "*", "label": "O"}, {"text": "ADVERTENCIA", "label": "O"}, {"text": "ESTOS", "label": "O"}, {"text": "DATOS", "label": "O"}, {"text": "CORRESPONDEN", "label": "O"}, {"text": "A", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "ULTIMA", "label": "O"}, {"text": "INFORMACION", "label": "O"}, {"text": "**", "label": "O"}, {"text": "SUMINISTRADA", "label": "O"}, {"text": "POR", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "COMERCIANTE", "label": "O"}, {"text": "EN", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "FORMULARIO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "MATRICULA", "label": "O"}, {"text": "*", "label": "O"}, {"text": "Y", "label": "O"}, {"text": "/", "label": "O"}, {"text": "O", "label": "O"}, {"text": "RENOVACION", "label": "O"}, {"text": "DEL", "label": "O"}, {"text": "A\u00d1O", "label": "O"}, {"text": "2010", "label": "O"}, {"text": "CERTIEICA", "label": "O"}, {"text": ":", "label": "O"}, {"text": "QUE", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "COMERCIANTE", "label": "O"}, {"text": "NO", "label": "O"}, {"text": "HA", "label": "O"}, {"text": "CUMPLIDO", "label": "O"}, {"text": "CON", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "OBLIGACION", "label": "O"}, {"text": "LEGAL", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "RENOVAR", "label": "O"}, {"text": "SU", "label": "O"}, {"text": "MATRICULA", "label": "O"}, {"text": "MERCANTIL", "label": "O"}, {"text": "DESDE", "label": "O"}, {"text": "2011", "label": "O"}, {"text": "CERTIFICA", "label": "O"}, {"text": "RENOVACION", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "MATR\u00cdCULA", "label": "O"}, {"text": ":", "label": "O"}]}, {"text": "EL 15 DE JUNIO DE 2010 ULTIMO A\u00d1O RENOVADO2010 TOTAL ACTIVOS TOTAL ACTIVOS SIN AJUSTES POR INFLACION 95,000,000 ACTIVIDAD ECONOMICA FABRICACION ELEMENTOS METALICOS, ORNAMENTACION PUERTAS, VENTANAS, REJAS Y VERJAS / ALQUILER DE VEHICULOS DE CARGA CON CONDUCTOR : 120,000,000 CERTIFICA: PROPIETARIO DE LOS SIGUIENTES ESTABLECIMIENTOS DE Comercio NOMBRE CARPINTERIA METALICA RODRIGUEZ DIRECCION COMERCIAL : DG 54 SUR No 87G-02 MUNICIPIO : BOGOTA D.C MATRICULA NO 00784839 DE 23 DE ABRIL DE 1997 RENOVACION DE LA MATRICULA EL 15 DE JUNIO DE 2010 ULTIMO A\u00d1O RENOVADO 2010 CERTIFICA LA INFORMACION ANTERIOR HA SIDO TOMADA DIRECTAMENTE DEL FORMULARIO DE MATRICULA DILIGENCIADO POR EL COMERCIANTE DE CONFORMIDAD CON LO ESTABLECIDO POR LA LEY 962 DE 2005, LOS ACTOS DE REGISTRO-AQUI-CERTIFICADOS-'QUEDAN EN-FIRME, CINCO-H DIAS-HABI DESPUES DE LA FECHA DE INSCRIPCION,", "text_labeled": "EL \"15\"/FCH \"DE\"/FCH \"JUNIO\"/FCH \"DE\"/FCH \"2010\"/FCH ULTIMO A\u00d1O RENOVADO2010 TOTAL ACTIVOS TOTAL ACTIVOS SIN AJUSTES POR INFLACION 95,000,000 \"ACTIVIDAD\"/ACT \"ECONOMICA\"/ACT \"FABRICACION\"/ACT \"ELEMENTOS\"/ACT \"METALICOS\"/ACT \",\"/ACT \"ORNAMENTACION\"/ACT \"PUERTAS\"/ACT \",\"/ACT \"VENTANAS\"/ACT \",\"/ACT \"REJAS\"/ACT \"Y\"/ACT \"VERJAS\"/ACT \"/\"/ACT \"ALQUILER\"/ACT \"DE\"/ACT \"VEHICULOS\"/ACT \"DE\"/ACT \"CARGA\"/ACT \"CON\"/ACT \"CONDUCTOR\"/ACT : 120,000,000 CERTIFICA : PROPIETARIO DE LOS SIGUIENTES \"ESTABLECIMIENTOS\"/TNRS \"DE\"/TNRS Comercio \"NOMBRE\"/TNRS \"CARPINTERIA\"/ORG \"METALICA\"/ORG \"RODRIGUEZ\"/ORG DIRECCION COMERCIAL : DG 54 SUR No 87G-02 MUNICIPIO : BOGOTA D.C \"MATRICULA\"/TFMT NO 00784839 DE \"23\"/FCH \"DE\"/FCH \"ABRIL\"/FCH \"DE\"/FCH \"1997\"/FCH RENOVACION DE LA MATRICULA EL \"15\"/FCH \"DE\"/FCH \"JUNIO\"/FCH \"DE\"/FCH \"2010\"/FCH ULTIMO A\u00d1O RENOVADO 2010 CERTIFICA LA INFORMACION ANTERIOR HA SIDO TOMADA DIRECTAMENTE DEL FORMULARIO DE MATRICULA DILIGENCIADO POR EL COMERCIANTE DE CONFORMIDAD CON LO ESTABLECIDO POR LA LEY 962 DE 2005 , LOS ACTOS DE REGISTRO-AQUI-CERTIFICADOS -' QUEDAN EN-FIRME , CINCO-H DIAS-HABI DESPUES DE LA FECHA DE INSCRIPCION ,", "tokens": [{"text": "EL", "label": "O"}, {"text": "15", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "JUNIO", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "2010", "label": "FCH"}, {"text": "ULTIMO", "label": "O"}, {"text": "A\u00d1O", "label": "O"}, {"text": "RENOVADO2010", "label": "O"}, {"text": "TOTAL", "label": "O"}, {"text": "ACTIVOS", "label": "O"}, {"text": "TOTAL", "label": "O"}, {"text": "ACTIVOS", "label": "O"}, {"text": "SIN", "label": "O"}, {"text": "AJUSTES", "label": "O"}, {"text": "POR", "label": "O"}, {"text": "INFLACION", "label": "O"}, {"text": "95,000,000", "label": "O"}, {"text": "ACTIVIDAD", "label": "ACT"}, {"text": "ECONOMICA", "label": "ACT"}, {"text": "FABRICACION", "label": "ACT"}, {"text": "ELEMENTOS", "label": "ACT"}, {"text": "METALICOS", "label": "ACT"}, {"text": ",", "label": "ACT"}, {"text": "ORNAMENTACION", "label": "ACT"}, {"text": "PUERTAS", "label": "ACT"}, {"text": ",", "label": "ACT"}, {"text": "VENTANAS", "label": "ACT"}, {"text": ",", "label": "ACT"}, {"text": "REJAS", "label": "ACT"}, {"text": "Y", "label": "ACT"}, {"text": "VERJAS", "label": "ACT"}, {"text": "/", "label": "ACT"}, {"text": "ALQUILER", "label": "ACT"}, {"text": "DE", "label": "ACT"}, {"text": "VEHICULOS", "label": "ACT"}, {"text": "DE", "label": "ACT"}, {"text": "CARGA", "label": "ACT"}, {"text": "CON", "label": "ACT"}, {"text": "CONDUCTOR", "label": "ACT"}, {"text": ":", "label": "O"}, {"text": "120,000,000", "label": "O"}, {"text": "CERTIFICA", "label": "O"}, {"text": ":", "label": "O"}, {"text": "PROPIETARIO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "LOS", "label": "O"}, {"text": "SIGUIENTES", "label": "O"}, {"text": "ESTABLECIMIENTOS", "label": "TNRS"}, {"text": "DE", "label": "TNRS"}, {"text": "Comercio", "label": "O"}, {"text": "NOMBRE", "label": "TNRS"}, {"text": "CARPINTERIA", "label": "ORG"}, {"text": "METALICA", "label": "ORG"}, {"text": "RODRIGUEZ", "label": "ORG"}, {"text": "DIRECCION", "label": "O"}, {"text": "COMERCIAL", "label": "O"}, {"text": ":", "label": "O"}, {"text": "DG", "label": "O"}, {"text": "54", "label": "O"}, {"text": "SUR", "label": "O"}, {"text": "No", "label": "O"}, {"text": "87G-02", "label": "O"}, {"text": "MUNICIPIO", "label": "O"}, {"text": ":", "label": "O"}, {"text": "BOGOTA", "label": "O"}, {"text": "D.C", "label": "O"}, {"text": "MATRICULA", "label": "TFMT"}, {"text": "NO", "label": "O"}, {"text": "00784839", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "23", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "ABRIL", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "1997", "label": "FCH"}, {"text": "RENOVACION", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "MATRICULA", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "15", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "JUNIO", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "2010", "label": "FCH"}, {"text": "ULTIMO", "label": "O"}, {"text": "A\u00d1O", "label": "O"}, {"text": "RENOVADO", "label": "O"}, {"text": "2010", "label": "O"}, {"text": "CERTIFICA", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "INFORMACION", "label": "O"}, {"text": "ANTERIOR", "label": "O"}, {"text": "HA", "label": "O"}, {"text": "SIDO", "label": "O"}, {"text": "TOMADA", "label": "O"}, {"text": "DIRECTAMENTE", "label": "O"}, {"text": "DEL", "label": "O"}, {"text": "FORMULARIO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "MATRICULA", "label": "O"}, {"text": "DILIGENCIADO", "label": "O"}, {"text": "POR", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "COMERCIANTE", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "CONFORMIDAD", "label": "O"}, {"text": "CON", "label": "O"}, {"text": "LO", "label": "O"}, {"text": "ESTABLECIDO", "label": "O"}, {"text": "POR", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "LEY", "label": "O"}, {"text": "962", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "2005", "label": "O"}, {"text": ",", "label": "O"}, {"text": "LOS", "label": "O"}, {"text": "ACTOS", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "REGISTRO-AQUI-CERTIFICADOS", "label": "O"}, {"text": "-'", "label": "O"}, {"text": "QUEDAN", "label": "O"}, {"text": "EN-FIRME", "label": "O"}, {"text": ",", "label": "O"}, {"text": "CINCO-H", "label": "O"}, {"text": "DIAS-HABI", "label": "O"}, {"text": "DESPUES", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "FECHA", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "INSCRIPCION", "label": "O"}, {"text": ",", "label": "O"}]}, {"text": "SIEMPRE QUE NO SEAN OBJETO DE RECURSOS EN LA VIA GUBERNAT\u012aVA **EL PRESENTE CERTIFICADO NO CONSTITUYE PERMISO DE FUNCIONAMIENTO EN NINGUN CASO SE\u00d1OR EMPRESARIO, SI SU EMPRESA TIENE ACTIVOS INFERIORES A 30.000 SMLMV Y UNA PLANTA DE PERSONAL DE MENOS DE 200 TRABAJADORES, USTED TIENE DERECHO A RECIBIR UN DESCUENTO EN EL PAGO DE LOS PARAFISCALES DE 75% EN EL PRIMER A\u00d1O DE CONSTITUCION DE SU EMPRESA, DE 50% EN EL SEGUNDO A\u00d1O Y DE 25% EN EL TERCER A\u00d1O.", "text_labeled": "SIEMPRE QUE NO SEAN OBJETO DE RECURSOS EN LA VIA GUBERNAT\u012aVA ** EL PRESENTE CERTIFICADO NO CONSTITUYE PERMISO DE FUNCIONAMIENTO EN NINGUN CASO SE\u00d1OR EMPRESARIO , SI SU EMPRESA TIENE ACTIVOS INFERIORES A 30.000 SMLMV Y UNA PLANTA DE PERSONAL DE MENOS DE 200 TRABAJADORES , USTED TIENE DERECHO A RECIBIR UN DESCUENTO EN EL PAGO DE LOS PARAFISCALES DE 75 % EN EL PRIMER A\u00d1O DE CONSTITUCION DE SU EMPRESA , DE 50 % EN EL SEGUNDO A\u00d1O Y DE 25 % EN EL TERCER A\u00d1O .", "tokens": [{"text": "SIEMPRE", "label": "O"}, {"text": "QUE", "label": "O"}, {"text": "NO", "label": "O"}, {"text": "SEAN", "label": "O"}, {"text": "OBJETO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "RECURSOS", "label": "O"}, {"text": "EN", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "VIA", "label": "O"}, {"text": "GUBERNAT\u012aVA", "label": "O"}, {"text": "**", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "PRESENTE", "label": "O"}, {"text": "CERTIFICADO", "label": "O"}, {"text": "NO", "label": "O"}, {"text": "CONSTITUYE", "label": "O"}, {"text": "PERMISO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "FUNCIONAMIENTO", "label": "O"}, {"text": "EN", "label": "O"}, {"text": "NINGUN", "label": "O"}, {"text": "CASO", "label": "O"}, {"text": "SE\u00d1OR", "label": "O"}, {"text": "EMPRESARIO", "label": "O"}, {"text": ",", "label": "O"}, {"text": "SI", "label": "O"}, {"text": "SU", "label": "O"}, {"text": "EMPRESA", "label": "O"}, {"text": "TIENE", "label": "O"}, {"text": "ACTIVOS", "label": "O"}, {"text": "INFERIORES", "label": "O"}, {"text": "A", "label": "O"}, {"text": "30.000", "label": "O"}, {"text": "SMLMV", "label": "O"}, {"text": "Y", "label": "O"}, {"text": "UNA", "label": "O"}, {"text": "PLANTA", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "PERSONAL", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "MENOS", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "200", "label": "O"}, {"text": "TRABAJADORES", "label": "O"}, {"text": ",", "label": "O"}, {"text": "USTED", "label": "O"}, {"text": "TIENE", "label": "O"}, {"text": "DERECHO", "label": "O"}, {"text": "A", "label": "O"}, {"text": "RECIBIR", "label": "O"}, {"text": "UN", "label": "O"}, {"text": "DESCUENTO", "label": "O"}, {"text": "EN", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "PAGO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "LOS", "label": "O"}, {"text": "PARAFISCALES", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "75", "label": "O"}, {"text": "%", "label": "O"}, {"text": "EN", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "PRIMER", "label": "O"}, {"text": "A\u00d1O", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "CONSTITUCION", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "SU", "label": "O"}, {"text": "EMPRESA", "label": "O"}, {"text": ",", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "50", "label": "O"}, {"text": "%", "label": "O"}, {"text": "EN", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "SEGUNDO", "label": "O"}, {"text": "A\u00d1O", "label": "O"}, {"text": "Y", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "25", "label": "O"}, {"text": "%", "label": "O"}, {"text": "EN", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "TERCER", "label": "O"}, {"text": "A\u00d1O", "label": "O"}, {"text": ".", "label": "O"}]}, {"text": "LEY 590 DE 2000 Y DECRETO 525 DE 2009.", "text_labeled": "LEY 590 DE 2000 Y DECRETO 525 DE 2009 .", "tokens": [{"text": "LEY", "label": "O"}, {"text": "590", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "2000", "label": "O"}, {"text": "Y", "label": "O"}, {"text": "DECRETO", "label": "O"}, {"text": "525", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "2009", "label": "O"}, {"text": ".", "label": "O"}]}, {"text": "EL SECRETARIO DE LA CAMARA DE COMERCIO, VALOR $ 1,900 DE CONFORMIDAD CON EL DECRETO 2150 DE 1995 Y LA AUTORIZACION IMPARTIDA POR LA SUPERINTENDENCIA DE INDUSTRIA Y COMERCIO, MEDIANTE EL OFICIO DEL 18 DE NO IEMBRE DE 1996, LA FIRMA MECANICA QUE APARECE A CONTINUACION TIENE PLENA VALIDEZ PARA TODOS LOS EFECTOS LEGALES \"", "text_labeled": "EL SECRETARIO DE LA CAMARA DE COMERCIO , VALOR $ 1,900 DE CONFORMIDAD CON EL DECRETO 2150 DE 1995 Y LA AUTORIZACION IMPARTIDA POR LA SUPERINTENDENCIA DE INDUSTRIA Y COMERCIO , MEDIANTE EL OFICIO DEL \"18\"/FCH \"DE\"/FCH \"NO\"/FCH \"IEMBRE\"/FCH \"DE\"/FCH \"1996\"/FCH , LA FIRMA MECANICA QUE APARECE A CONTINUACION TIENE PLENA VALIDEZ PARA TODOS LOS EFECTOS LEGALES \"", "tokens": [{"text": "EL", "label": "O"}, {"text": "SECRETARIO", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "CAMARA", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "COMERCIO", "label": "O"}, {"text": ",", "label": "O"}, {"text": "VALOR", "label": "O"}, {"text": "$", "label": "O"}, {"text": "1,900", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "CONFORMIDAD", "label": "O"}, {"text": "CON", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "DECRETO", "label": "O"}, {"text": "2150", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "1995", "label": "O"}, {"text": "Y", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "AUTORIZACION", "label": "O"}, {"text": "IMPARTIDA", "label": "O"}, {"text": "POR", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "SUPERINTENDENCIA", "label": "O"}, {"text": "DE", "label": "O"}, {"text": "INDUSTRIA", "label": "O"}, {"text": "Y", "label": "O"}, {"text": "COMERCIO", "label": "O"}, {"text": ",", "label": "O"}, {"text": "MEDIANTE", "label": "O"}, {"text": "EL", "label": "O"}, {"text": "OFICIO", "label": "O"}, {"text": "DEL", "label": "O"}, {"text": "18", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "NO", "label": "FCH"}, {"text": "IEMBRE", "label": "FCH"}, {"text": "DE", "label": "FCH"}, {"text": "1996", "label": "FCH"}, {"text": ",", "label": "O"}, {"text": "LA", "label": "O"}, {"text": "FIRMA", "label": "O"}, {"text": "MECANICA", "label": "O"}, {"text": "QUE", "label": "O"}, {"text": "APARECE", "label": "O"}, {"text": "A", "label": "O"}, {"text": "CONTINUACION", "label": "O"}, {"text": "TIENE", "label": "O"}, {"text": "PLENA", "label": "O"}, {"text": "VALIDEZ", "label": "O"}, {"text": "PARA", "label": "O"}, {"text": "TODOS", "label": "O"}, {"text": "LOS", "label": "O"}, {"text": "EFECTOS", "label": "O"}, {"text": "LEGALES", "label": "O"}, {"text": "\"", "label": "O"}]}], "entities": [{"entity": "FCH", "index": 7, "word": "21", "start": 48, "end": 50}, {"entity": "FCH", "index": 8, "word": "DE", "start": 51, "end": 53}, {"entity": "FCH", "index": 9, "word": "NOVIEMBRE", "start": 54, "end": 63}, {"entity": "FCH", "index": 10, "word": "DE", "start": 64, "end": 66}, {"entity": "FCH", "index": 11, "word": "2011", "start": 67, "end": 71}, {"entity": "PER", "index": 43, "word": "RODRIGUEZ", "start": 268, "end": 277}, {"entity": "PER", "index": 44, "word": "MURCIA", "start": 278, "end": 284}, {"entity": "PER", "index": 45, "word": "JULIO", "start": 285, "end": 290}, {"entity": "TDID", "index": 46, "word": "C.C", "start": 291, "end": 294}, {"entity": "DID", "index": 47, "word": "194459", "start": 295, "end": 301}, {"entity": "TDID", "index": 48, "word": "NIT", "start": 302, "end": 305}, {"entity": "DID", "index": 50, "word": "194459-9", "start": 307, "end": 315}, {"entity": "TFMT", "index": 53, "word": "MATRICULA", "start": 327, "end": 336}, {"entity": "FCH", "index": 57, "word": "23", "start": 353, "end": 355}, {"entity": "FCH", "index": 58, "word": "DE", "start": 356, "end": 358}, {"entity": "FCH", "index": 59, "word": "ABRIL", "start": 359, "end": 364}, {"entity": "FCH", "index": 60, "word": "DE", "start": 365, "end": 367}, {"entity": "FCH", "index": 61, "word": "1997", "start": 368, "end": 372}, {"entity": "FCH", "index": 153, "word": "15", "start": 964, "end": 966}, {"entity": "FCH", "index": 154, "word": "DE", "start": 967, "end": 969}, {"entity": "FCH", "index": 155, "word": "JUNIO", "start": 970, "end": 975}, {"entity": "FCH", "index": 156, "word": "DE", "start": 976, "end": 978}, {"entity": "FCH", "index": 157, "word": "2010", "start": 979, "end": 983}, {"entity": "ACT", "index": 170, "word": "ACTIVIDAD", "start": 1073, "end": 1082}, {"entity": "ACT", "index": 171, "word": "ECONOMICA", "start": 1083, "end": 1092}, {"entity": "ACT", "index": 172, "word": "FABRICACION", "start": 1093, "end": 1104}, {"entity": "ACT", "index": 173, "word": "ELEMENTOS", "start": 1105, "end": 1114}, {"entity": "ACT", "index": 174, "word": "METALICOS", "start": 1115, "end": 1124}, {"entity": "ACT", "index": 175, "word": ",", "start": 1124, "end": 1125}, {"entity": "ACT", "index": 176, "word": "ORNAMENTACION", "start": 1126, "end": 1139}, {"entity": "ACT", "index": 177, "word": "PUERTAS", "start": 1140, "end": 1147}, {"entity": "ACT", "index": 178, "word": ",", "start": 1147, "end": 1148}, {"entity": "ACT", "index": 179, "word": "VENTANAS", "start": 1149, "end": 1157}, {"entity": "ACT", "index": 180, "word": ",", "start": 1157, "end": 1158}, {"entity": "ACT", "index": 181, "word": "REJAS", "start": 1159, "end": 1164}, {"entity": "ACT", "index": 182, "word": "Y", "start": 1165, "end": 1166}, {"entity": "ACT", "index": 183, "word": "VERJAS", "start": 1167, "end": 1173}, {"entity": "ACT", "index": 184, "word": "/", "start": 1174, "end": 1175}, {"entity": "ACT", "index": 185, "word": "ALQUILER", "start": 1176, "end": 1184}, {"entity": "ACT", "index": 186, "word": "DE", "start": 1185, "end": 1187}, {"entity": "ACT", "index": 187, "word": "VEHICULOS", "start": 1188, "end": 1197}, {"entity": "ACT", "index": 188, "word": "DE", "start": 1198, "end": 1200}, {"entity": "ACT", "index": 189, "word": "CARGA", "start": 1201, "end": 1206}, {"entity": "ACT", "index": 190, "word": "CON", "start": 1207, "end": 1210}, {"entity": "ACT", "index": 191, "word": "CONDUCTOR", "start": 1211, "end": 1220}, {"entity": "TNRS", "index": 200, "word": "ESTABLECIMIENTOS", "start": 1276, "end": 1292}, {"entity": "TNRS", "index": 201, "word": "DE", "start": 1293, "end": 1295}, {"entity": "TNRS", "index": 203, "word": "NOMBRE", "start": 1305, "end": 1311}, {"entity": "ORG", "index": 204, "word": "CARPINTERIA", "start": 1312, "end": 1323}, {"entity": "ORG", "index": 205, "word": "METALICA", "start": 1324, "end": 1332}, {"entity": "ORG", "index": 206, "word": "RODRIGUEZ", "start": 1333, "end": 1342}, {"entity": "TFMT", "index": 219, "word": "MATRICULA", "start": 1409, "end": 1418}, {"entity": "FCH", "index": 223, "word": "23", "start": 1434, "end": 1436}, {"entity": "FCH", "index": 224, "word": "DE", "start": 1437, "end": 1439}, {"entity": "FCH", "index": 225, "word": "ABRIL", "start": 1440, "end": 1445}, {"entity": "FCH", "index": 226, "word": "DE", "start": 1446, "end": 1448}, {"entity": "FCH", "index": 227, "word": "1997", "start": 1449, "end": 1453}, {"entity": "FCH", "index": 233, "word": "15", "start": 1484, "end": 1486}, {"entity": "FCH", "index": 234, "word": "DE", "start": 1487, "end": 1489}, {"entity": "FCH", "index": 235, "word": "JUNIO", "start": 1490, "end": 1495}, {"entity": "FCH", "index": 236, "word": "DE", "start": 1496, "end": 1498}, {"entity": "FCH", "index": 237, "word": "2010", "start": 1499, "end": 1503}, {"entity": "FCH", "index": 420, "word": "18", "start": 2505, "end": 2507}, {"entity": "FCH", "index": 421, "word": "DE", "start": 2508, "end": 2510}, {"entity": "FCH", "index": 422, "word": "NO", "start": 2511, "end": 2513}, {"entity": "FCH", "index": 423, "word": "IEMBRE", "start": 2514, "end": 2520}, {"entity": "FCH", "index": 424, "word": "DE", "start": 2521, "end": 2523}, {"entity": "FCH", "index": 425, "word": "1996", "start": 2524, "end": 2528}]}
src/scripts/Error_handling.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1 Error loading tagger
2
+ 2 Error loading document
3
+ 3 Format Error in document
4
+ 4 Empty folder for training
5
+ 5 Error loading embeddings in training
6
+ 6 Error making tagger in training
7
+ 7 Error training the model
8
+ 8 Invalid input document in training
9
+ 9 Document does not exists
10
+ 10 Model does not exists
11
+ 11 Error in output JSON
12
+ 12 Error making up the data
13
+ 13 Error defining the model
14
+
15
+
src/scripts/Json_formats.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Dec 6 11:21:55 2022
4
+
5
+ @author: gita
6
+ """
7
+
8
+ import gradio as gr
9
+
10
+ def image_classifier():
11
+ # j={
12
+ # "sentences":[
13
+ # {"text":"Frase ejemplo"},
14
+ # {"text":"Frase ejemplo"}
15
+ # ]
16
+ # }
17
+
18
+ # j = {
19
+ # 'text':"Frase ejemplo Frase ejemplo ",
20
+
21
+ # 'text_labeled':" \"Frase\"/Entity_Type ejemplo \"Frase\"/Entity_Type ejemplo ",
22
+
23
+ # 'sentences':[
24
+ # {'text':"Frase ejemplo",
25
+ # 'text_labeled':" \"Frase\"/Entity_Type ejemplo",
26
+ # 'tokens':[
27
+ # {'text':"Frase", 'label':"Entity_Type"},
28
+ # {'text':"ejemplo", 'label':"O"}
29
+ # ]},
30
+
31
+ # {'text':"Frase ejemplo",
32
+ # 'text_labeled':" \"Frase\"/Entity_Type ejemplo",
33
+ # 'tokens':[
34
+ # {'text':"Frase", 'label':"Entity_Type"},
35
+ # {'text':"ejemplo", 'label':"O"}
36
+ # ]}
37
+
38
+ # ],
39
+
40
+
41
+ # 'entities': [
42
+ # {
43
+ # 'entity': "Entity_Type" ,
44
+ # 'index' : 0,
45
+ # 'word' : "Frase",
46
+ # 'start': 0,
47
+ # 'end' : 5
48
+
49
+ # },
50
+ # {
51
+ # 'entity': "Entity_Type" ,
52
+ # 'index' : 2,
53
+ # 'word' : "Frase",
54
+ # 'start': 14,
55
+ # 'end' : 19
56
+
57
+ # }
58
+ # ]
59
+
60
+ # }
61
+
62
+
63
+ j = {
64
+
65
+ 'text':"Frase ejemplo Frase ejemplo",
66
+
67
+ 'sentences':[
68
+ {'text':"Frase ejemplo",
69
+ 'id':"s0",
70
+ 'tokens':[
71
+ {'text':"Frase", 'begin':0, 'end':5},
72
+ {'text':"ejemplo", 'begin':6, 'end':13}
73
+ ]},
74
+
75
+ {'text':"Frase ejemplo",
76
+ 'id':"s1",
77
+ 'tokens':[
78
+ {'text':"Frase", 'begin':14, 'end':19},
79
+ {'text':"ejemplo", 'begin':20, 'end':27}
80
+ ]},
81
+
82
+ ],
83
+
84
+
85
+ 'mentions': [
86
+ {
87
+ 'id': "s0-m0" ,
88
+ 'type' : "Entity_type",
89
+ 'begin' : 0,
90
+ 'end': 5,
91
+
92
+ },
93
+
94
+ {
95
+ 'id': "s1-m0" ,
96
+ 'type' : "Entity_type",
97
+ 'begin' : 14,
98
+ 'end': 19,
99
+
100
+ }
101
+
102
+ ]
103
+
104
+ }
105
+
106
+
107
+
108
+ return j
109
+
110
+ demo = gr.Interface(fn=image_classifier, inputs=None, outputs=gr.JSON())
111
+ demo.launch()
112
+
113
+ #%%
114
+ # JSON FORMAT OUTPUT
115
+
116
+ # Document:{ text:"Texto"
117
+
118
+ # text_labeled: "Texto \ENTITY"
119
+
120
+ # sentences:[{ text:"Texto"
121
+
122
+ # text_labeled: "Texto \ENTITY"
123
+
124
+ # tokens: [ {text:"Texto", label : "ENTITY"},
125
+ # {text:"Texto", label : "ENTITY"},
126
+ # {text:"Texto", label : "ENTITY"}
127
+
128
+ # ]
129
+
130
+ # },
131
+
132
+ # { text:"Texto"
133
+
134
+ # text_labeled: "Texto <ENTITY>"
135
+
136
+ # tokens: [ {text:"Texto", label : "ENTITY"},
137
+ # {text:"Texto", label : "ENTITY"},
138
+ # {text:"Texto", label : "ENTITY"}
139
+
140
+ # ]
141
+
142
+ # }
143
+ # ],
144
+ # entities:[
145
+ # {
146
+ # 'entity': "ENTITY",
147
+ # 'index': num,
148
+ # 'word': "Texto",
149
+ # 'start': num,
150
+ # 'end' : num
151
+ # }
152
+ # ]
153
+ # }
154
+
155
+ #%%
156
+
157
+ # JSON FORMAT INPUT
158
+
159
+ # json{...
160
+ # sentences:{
161
+ # s:{
162
+ # text:
163
+ # }
164
+ # }
165
+
166
+ # ...}
src/scripts/Tagged_document.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Oct 25 00:32:02 2022
4
+
5
+ @author: Santiago Moreno
6
+ """
7
+ import os
8
+ import argparse
9
+ from functions import use_model, str2bool, usage_cuda
10
+
11
+ default_path = os.path.dirname(os.path.abspath(__file__))
12
+ os.chdir(default_path)
13
+ output_dir = "../../data/tagged/document_tagged.json"
14
+
15
+
16
+
17
+ if __name__ == '__main__':
18
+ parser = argparse.ArgumentParser(add_help=True, usage='Tag a document with a pre-trained model (GPU optional)')
19
+ parser.add_argument('-m','--model', default='CCC', type=str, nargs='?', help='New model name', required=True)
20
+ parser.add_argument('-id','--input_data', type=str, nargs='?', help='Absolute path input file', required=True)
21
+ parser.add_argument('-od','--output_data', const=output_dir, default=output_dir, type=str, nargs='?', help='Absolute path output file', required=False)
22
+ parser.add_argument('-cu','--cuda', type=str2bool, nargs='?', const=True, default=False, help='Boolean value for using cuda to Train the model (True). By defaul False.', choices=(True, False), required=False)
23
+ args = parser.parse_args()
24
+
25
+ #print(args.model, args.input_data, args.output_data)
26
+ if args.cuda: cuda_info = usage_cuda(True)
27
+ else: cuda_info = usage_cuda(False)
28
+ print(cuda_info)
29
+ Error = use_model(args.model, args.input_data, args.output_data)
30
+ if type(Error)==int:
31
+ print('Tagged not complete, error code {}'.format(Error))
32
+ else:
33
+ print('Tagged complete')
34
+
35
+ # path_data = "C:/Users/gita/OneDrive - Universidad de Antioquia/GITA/Maestría/Programas/Datasets/camara_comercio_NER/gt/3cb4fa20-89cb-11e8-a485-d149999fe64b-0.json "
36
+ # output_dir = "C:/Users/gita/OneDrive - Universidad de Antioquia/GITA/Maestría/Programas/Software NER/document_tagged.json"
37
+ # sentence = use_model('CCC', path_data, output_dir)
src/scripts/Test.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Thu May 4 18:47:46 2023
4
+
5
+ @author: sanmo
6
+ """
7
+ import os
8
+ default_path = os.path.dirname(os.path.abspath(__file__))
9
+ default_path = default_path.replace('\\', '/')
10
+
11
+ from functionsrc import training_model_rc, usage_cuda_rc, use_model_rc
12
+
13
+ path_data = default_path + '/../../data/RC/test.txt'
14
+ rel2id_data = default_path + '/../../data/RC/rel2id.json'
15
+ print(usage_cuda_rc(True))
16
+ training_model_rc('p', path_data, rel2id_data, 2)
17
+
18
+ # output_dir = default_path + '/../../out_RC.json'
19
+
20
+ # print(use_model_rc('new', path_data, output_dir))
src/scripts/Train_model.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Sat Oct 29 14:56:09 2022
4
+
5
+ @author: Santiago Moreno
6
+ """
7
+ import os
8
+ import argparse
9
+ from functions import json_to_txt, training_model, characterize_data, upsampling_data, str2bool, usage_cuda,copy_data
10
+ default_path = os.path.dirname(os.path.abspath(__file__))
11
+ os.chdir(default_path)
12
+
13
+
14
+
15
+ if __name__ == '__main__':
16
+ parser = argparse.ArgumentParser(add_help=True, usage='Train a new model with given data (GPU optional)')
17
+ parser.add_argument('-f','--fast', type=str2bool, nargs='?',const=True, default=False, help='Training fast option (Only for functioning test)', choices=(True, False), required=False)
18
+ parser.add_argument('-m','--model', type=str, nargs='?', help='New model name', required=True)
19
+ parser.add_argument('-s','--standard', type=str2bool, nargs='?',const=True, default=False, help='Standard CONLL input or not', choices=(True, False), required=False)
20
+ parser.add_argument('-id','--input_dir', type=str, nargs='?', help='Absolute path input directory', required=True)
21
+ parser.add_argument('-u','--up_sample_flag', type=str2bool, nargs='?',const=True, default=False , help='Boolean value to upsampling the data = True or not upsampling = False', required=False, choices=(True, False))
22
+ parser.add_argument('-cu','--cuda', type=str2bool, nargs='?', const=True, default=False, help='Boolean value for using cuda to Train the model (True). By defaul False.', choices=(True, False), required=False)
23
+
24
+ args = parser.parse_args()
25
+
26
+
27
+ if args.fast: epochs = 1
28
+ else: epochs = 20
29
+
30
+ if args.standard:
31
+ copy_data(args.input_dir)
32
+ not_error=True
33
+ else:
34
+ Error = json_to_txt(args.input_dir)
35
+ if type(Error)==int:
36
+ print('Error processing the input documents, code error {}'.format(Error))
37
+ not_error=False
38
+ else:
39
+ not_error=True
40
+
41
+ if not_error:
42
+ if args.up_sample_flag:
43
+ entities_dict=characterize_data()
44
+ entities = list(entities_dict.keys())
45
+ entities_to_upsample = [entities[i] for i,value in enumerate(entities_dict.values()) if value < 200]
46
+ upsampling_data(entities_to_upsample, 0.8, entities)
47
+
48
+ if args.cuda: cuda_info = usage_cuda(True)
49
+ else: cuda_info = usage_cuda(False)
50
+
51
+ print(cuda_info)
52
+
53
+ Error = training_model(args.model,epochs)
54
+ if type(Error)==int:
55
+ print('Error training the model, code error {}'.format(Error))
56
+ else:
57
+ print('Training complete')
src/scripts/__pycache__/functionsner.cpython-311.pyc ADDED
Binary file (19.5 kB). View file
 
src/scripts/__pycache__/functionsner.cpython-39.pyc ADDED
Binary file (9.48 kB). View file
 
src/scripts/__pycache__/functionsrc.cpython-311.pyc ADDED
Binary file (37.8 kB). View file
 
src/scripts/__pycache__/functionsrc.cpython-39.pyc ADDED
Binary file (16.2 kB). View file
 
src/scripts/__pycache__/upsampling.cpython-311.pyc ADDED
Binary file (21.5 kB). View file
 
src/scripts/__pycache__/upsampling.cpython-39.pyc ADDED
Binary file (13 kB). View file
 
src/scripts/functionsner.py ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Oct 11 16:46:45 2022
4
+
5
+ @author: Santiago Moreno
6
+ """
7
+ from upsampling import upsampling_ner
8
+ from flair.datasets import ColumnCorpus
9
+ from flair.data import Corpus
10
+ from flair.trainers import ModelTrainer
11
+ from flair.models import SequenceTagger
12
+ from flair.embeddings import TransformerWordEmbeddings
13
+ from torch.optim.lr_scheduler import OneCycleLR
14
+ from flair.data import Sentence
15
+ from sklearn.model_selection import StratifiedGroupKFold
16
+ from distutils.dir_util import copy_tree
17
+ import numpy as np
18
+ import torch
19
+ import pandas as pd
20
+ import json
21
+ import os
22
+ import operator
23
+ import flair
24
+ import argparse
25
+
26
+ default_path = os.path.dirname(os.path.abspath(__file__))
27
+ tagger_document = 0
28
+ tagger_sentence = 0
29
+ def check_create(path):
30
+ import os
31
+
32
+ if not (os.path.isdir(path)):
33
+ os.makedirs(path)
34
+
35
+ def str2bool(v):
36
+ if isinstance(v, bool):
37
+ return v
38
+ if v.lower() in ('yes', 'True','true', 't', 'y', '1'):
39
+ return True
40
+ elif v.lower() in ('no', 'False', 'false', 'f', 'n', '0'):
41
+ return False
42
+ else:
43
+ raise argparse.ArgumentTypeError('Boolean value expected.')
44
+
45
+
46
+ def copy_data(original_path):
47
+ data_folder = default_path + '/../../data/train'
48
+ copy_tree(original_path, data_folder)
49
+
50
+ def characterize_data():
51
+ data_folder = default_path + '/../../data/train'
52
+ columns = {0: 'text', 1:'ner'}
53
+
54
+ # init a corpus using column format, data folder and the names of the train, dev and test files
55
+
56
+ try:
57
+ corpus: Corpus = ColumnCorpus(data_folder, columns,
58
+ train_file='train.txt',
59
+ test_file='test.txt' )
60
+ #dev_file='dev.txt')
61
+ except:
62
+ print('Invalid input document in training')
63
+ return 8
64
+
65
+ # 2. what tag do we want to predict?
66
+ tag_type = 'ner'
67
+
68
+ #tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)
69
+ tag_dictionary = corpus.get_label_distribution()
70
+ return tag_dictionary
71
+ #return corpus
72
+
73
+
74
+ def upsampling_data(entities_to_upsample, probability, entities):
75
+ print('-'*20,'upsampling','-'*20)
76
+ data_folder = default_path + '/../../data/train'
77
+ columns = {'text':0, 'ner':1}
78
+ for m in ["SiS","LwTR","MR","SR", "MBT"]:
79
+ upsampler = upsampling_ner(data_folder+'/train.txt', entities+['O'], columns)
80
+ data, data_labels = upsampler.get_dataset()
81
+ new_samples, new_labels = upsampler.upsampling(entities_to_upsample,probability,[m])
82
+ data += new_samples
83
+ data_labels += new_labels
84
+
85
+ with open(data_folder+'/train.txt', mode='w', encoding='utf-8') as f:
86
+ for l,sentence in enumerate(data):
87
+ for j,word in enumerate(sentence):
88
+ f.write(word+' '+ data_labels[l][j])
89
+ f.write('\n')
90
+
91
+ if l < (len(data)-1):
92
+ f.write('\n')
93
+
94
+ print('-'*20,'upsampling complete','-'*20)
95
+
96
+
97
+ def usage_cuda(cuda):
98
+ if cuda:
99
+ flair.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
100
+ if flair.device == torch.device('cpu'): return 'Error handling GPU, CPU will be used'
101
+ elif flair.device == torch.device('cuda:0'): return 'GPU detected, GPU will be used'
102
+ else:
103
+ flair.device = torch.device('cpu')
104
+ return 'CPU will be used'
105
+
106
+
107
+ def training_model(name, epochs=20):
108
+ #FUNCION
109
+
110
+ data_folder = default_path + '/../../data/train'
111
+ path_model = default_path + '/../../models/{}'.format(name)
112
+ if (os.path.isdir(path_model)): print('WARNING, model already exists will be overwritten')
113
+ columns = {0: 'text', 1:'ner'}
114
+ # init a corpus using column format, data folder and the names of the train, dev and test files
115
+
116
+
117
+ try:
118
+ corpus: Corpus = ColumnCorpus(data_folder, columns,
119
+ train_file='train.txt',
120
+ test_file='test.txt' )
121
+ #dev_file='dev.txt')
122
+ except:
123
+ print('Invalid input document in training')
124
+ return 8
125
+
126
+
127
+
128
+
129
+ # 2. what tag do we want to predict?
130
+ tag_type = 'ner'
131
+
132
+ # 3. make the tag dictionary from the corpus
133
+ #tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)
134
+ tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)
135
+
136
+ try:
137
+ embeddings = TransformerWordEmbeddings(
138
+ model='xlm-roberta-large',
139
+ layers="-1",
140
+ subtoken_pooling="first",
141
+ fine_tune=True,
142
+ use_context=True,
143
+ )
144
+ except:
145
+ print('Error while loading embeddings from RoBERTa')
146
+ return 5
147
+
148
+ # 5. initialize bare-bones sequence tagger (no CRF, no RNN, no reprojection)
149
+
150
+ try:
151
+ tagger_train = SequenceTagger(
152
+ hidden_size=256,
153
+ embeddings=embeddings,
154
+ tag_dictionary=tag_dictionary,
155
+ tag_type='ner',
156
+ use_crf=False,
157
+ use_rnn=False,
158
+ reproject_embeddings=False,
159
+ )
160
+ except:
161
+ print('Error making tagger')
162
+ return 6
163
+
164
+ # 6. initialize trainer with AdamW optimizer
165
+
166
+
167
+ trainer = ModelTrainer(tagger_train, corpus)
168
+
169
+ # 7. run training with XLM parameters (20 epochs, small LR)
170
+ try:
171
+ trainer.train(path_model,
172
+ learning_rate=5.0e-6,
173
+ mini_batch_size=1,
174
+ mini_batch_chunk_size=1,
175
+ max_epochs=epochs,
176
+ scheduler=OneCycleLR,
177
+ embeddings_storage_mode='cpu',
178
+ optimizer=torch.optim.AdamW,
179
+ )
180
+ except:
181
+ pass
182
+ print('Error training the model, try setting CUDA False')
183
+ return 7
184
+
185
+ print("Model {} trained and saved in {}".format(name,'models/{}'.format(name)))
186
+
187
+
188
+ def tag_sentence(sentence, name):
189
+
190
+ results={'Sentence_tagged':'', 'Highligth':{}}
191
+ Highligth_dict={"text": "", "entities": []}
192
+
193
+
194
+ #--------------Load the trained model-------------------------
195
+ path_model = default_path + '/../../models/{}'.format(name)
196
+ global tagger_sentence
197
+
198
+ if (not tagger_sentence):
199
+
200
+ try:
201
+ tagger_sentence = SequenceTagger.load(path_model+'/best-model.pt')
202
+ except:
203
+ try:
204
+ tagger_sentence = SequenceTagger.load(path_model+'/final-model.pt')
205
+ except:
206
+ print('Invalid model')
207
+ return 1
208
+
209
+ #------------------Tagged sentence---------------------
210
+ print('-'*20,'Tagging','-'*20)
211
+ sentence_f = Sentence(sentence)
212
+ tagger_sentence.predict(sentence_f)
213
+ sentence_tokenized = []
214
+ Highligth_dict['text'] = sentence_f.to_plain_string()
215
+
216
+ for indx,token in enumerate(sentence_f.tokens):
217
+
218
+ t = token.get_label()
219
+ if t.value == 'O':
220
+ sentence_tokenized += [token.text]
221
+ else:
222
+ sentence_tokenized += [t.shortstring]
223
+ token_info={
224
+ 'entity': t.value ,
225
+ 'index' : indx,
226
+ 'word' : token.text,
227
+ 'start': token.start_position,
228
+ 'end' : token.end_position
229
+
230
+ }
231
+ Highligth_dict["entities"].append(token_info)
232
+ sen_tagged = ' ' .join(sentence_tokenized)
233
+ results['Highligth'] = Highligth_dict
234
+ results['Sentence_tagged'] = sen_tagged
235
+ print('-'*20,'Tagged complete','-'*20)
236
+ return results
237
+
238
+
239
+ def use_model(name, path_data, output_dir):
240
+
241
+ #--------------Load the trained model-------------------------
242
+ path_model = default_path + '/../../models/{}'.format(name)
243
+
244
+ if not (os.path.isdir(path_model)):
245
+ print('Model does not exists')
246
+ return 10
247
+
248
+ if not os.path.isfile(path_data):
249
+ print('Input file is not a file')
250
+ return 9
251
+
252
+ global tagger_document
253
+
254
+ if (not tagger_document):
255
+
256
+ try:
257
+ tagger_document = SequenceTagger.load(path_model+'/best-model.pt')
258
+ except:
259
+ try:
260
+ tagger_document = SequenceTagger.load(path_model+'/final-model.pt')
261
+ except:
262
+ print('Invalid model')
263
+ return 1
264
+
265
+ #-----------------Load the document-------------------------
266
+ try:
267
+ data = pd.read_json(path_data, orient ='index', encoding='utf-8')[0]
268
+ except:
269
+ print('Can\'t open the input file')
270
+ return 2
271
+
272
+ if len(data) <= 0:
273
+ print(f"length of document greater than 0 expected, got: {len(data)}")
274
+ return 2
275
+
276
+ try:
277
+ sentences=data['sentences']
278
+ t = sentences[0]['text']
279
+ except:
280
+ print('Invalid JSON format in document {}'.format(path_data))
281
+ return 3
282
+ print('-'*20,'Tagging','-'*20)
283
+
284
+
285
+
286
+ #-----------------Tagged the document-------------------------
287
+ results = {'text':"", 'text_labeled':"",'sentences':[], 'entities': []}
288
+ indx_prev = 0
289
+ pos_prev = 0
290
+ for s in sentences:
291
+ sentence = Sentence(s['text'])
292
+ tagger_document.predict(sentence, mini_batch_size = 1)
293
+ sen_dict_temp = {'text':sentence.to_plain_string(), 'text_labeled':'', 'tokens':[]}
294
+ #return sentence
295
+ sentence_tokenized = []
296
+ for indx,token in enumerate(sentence.tokens):
297
+ token_dict = {'text':token.text, 'label':token.get_label('ner').value}
298
+ sen_dict_temp['tokens'].append(token_dict)
299
+
300
+ t = token.get_label('ner')
301
+ if t.value == 'O':
302
+ sentence_tokenized += [token.text]
303
+ else:
304
+ sentence_tokenized += [t.shortstring]
305
+ token_info={
306
+ 'entity': t.value ,
307
+ 'index' : indx + indx_prev,
308
+ 'word' : token.text,
309
+ 'start': token.start_position + pos_prev,
310
+ 'end' : token.end_position +pos_prev
311
+
312
+ }
313
+ results["entities"].append(token_info)
314
+ indx_prev += len(sentence.tokens)
315
+ pos_prev += len(sentence.to_plain_string())
316
+ sen_tagged = ' ' .join(sentence_tokenized)
317
+ sen_dict_temp['text_labeled'] = sen_tagged
318
+ results['sentences'].append(sen_dict_temp)
319
+ results['text'] += sentence.to_plain_string()
320
+ #return sentence
321
+ results['text_labeled'] += sen_tagged
322
+
323
+ #-----------------Save the results-------------------------
324
+ try:
325
+ with open(output_dir, "w", encoding='utf-8') as write_file:
326
+ json.dump(results, write_file)
327
+
328
+ print('-'*20,'Tagged complete','-'*20)
329
+ print('Document tagged saved in {}'.format(output_dir))
330
+ except:
331
+ print('Error in output file')
332
+ return 11
333
+
334
+ return results
335
+
336
+ def json_to_txt(path_data_documents):
337
+ #-------------List the documents in the path------------
338
+ documents=os.listdir(path_data_documents)
339
+ if len(documents) <= 0:
340
+ print('There are not documents in the folder')
341
+ return 4
342
+
343
+ data_from_documents={'id':[],'document':[],'sentence':[],'word':[],'tag':[]}
344
+
345
+ #--------------Verify each documment-------------
346
+ for num,doc in enumerate(documents):
347
+ data=path_data_documents+'/'+doc
348
+ df = pd.read_json(data, orient ='index')[0]
349
+ try:
350
+ sentences = df['sentences']
351
+ t = sentences[0]['text']
352
+ t = sentences[0]['id']
353
+ t = sentences[0]['tokens']
354
+ j = t[0]['text']
355
+ j = t[0]['begin']
356
+ j = t[0]['end']
357
+ tags = df['mentions']
358
+ if tags:
359
+ tg = tags[0]['id']
360
+ tg = tags[0]['begin']
361
+ tg = tags[0]['end']
362
+ tg = tags[0]['type']
363
+ except:
364
+ print('Invalid JSON input format in document {}'.format(doc))
365
+ return 3
366
+
367
+
368
+ #-----------------Organize the data----------------
369
+ for s in sentences:
370
+ id_senten=s['id']
371
+ for tk in s['tokens']:
372
+ if len(tk['text'])==1:
373
+ #if ord(tk['text'])>=48 and ord(tk['text'])<=57 and ord(tk['text'])>=65 and ord(tk['text'])<=90 and ord(tk['text'])>=97 and ord(tk['text'])<=122:
374
+ tk_beg=tk['begin']
375
+ tk_end=tk['end']
376
+ data_from_documents['id'].append('d'+str(num)+'_'+id_senten)
377
+ data_from_documents['document'].append(doc)
378
+ data_from_documents['word'].append(tk['text'])
379
+ data_from_documents['sentence'].append(s['text'])
380
+ data_from_documents['tag'].append('O')
381
+ for tg in tags:
382
+ if id_senten == tg['id'].split('-')[0] and tk['begin']>=tg['begin'] and tk['begin']<tg['end']:
383
+ data_from_documents['tag'][-1]=tg['type']
384
+ break
385
+
386
+ else:
387
+ tk_beg=tk['begin']
388
+ tk_end=tk['end']
389
+ data_from_documents['id'].append('d'+str(num)+'_'+id_senten)
390
+ data_from_documents['document'].append(doc)
391
+ data_from_documents['word'].append(tk['text'])
392
+ data_from_documents['sentence'].append(s['text'])
393
+ data_from_documents['tag'].append('O')
394
+ for tg in tags:
395
+ if id_senten == tg['id'].split('-')[0] and tk['begin']>=tg['begin'] and tk['begin']<tg['end']:
396
+ data_from_documents['tag'][-1]=tg['type']
397
+ break
398
+
399
+ X=np.array(data_from_documents['word'])
400
+ y=np.array(data_from_documents['tag'])
401
+ groups=np.array(data_from_documents['id'])
402
+
403
+
404
+ #-------------------Save the data in CONLL format--------------
405
+ group_kfold = StratifiedGroupKFold(n_splits=10, shuffle=True, random_state=42)
406
+ group_kfold.get_n_splits(X, y, groups)
407
+ for train_index, test_index in group_kfold.split(X, y, groups):
408
+ X_train, X_test = X[train_index], X[test_index]
409
+ y_train, y_test = y[train_index], y[test_index]
410
+ groups_train, groups_test = groups[train_index], groups[test_index]
411
+ break
412
+
413
+
414
+
415
+
416
+ X_write=[X_train,X_test]
417
+ y_write=[y_train,y_test]
418
+ groups_write=[groups_train, groups_test]
419
+ archivos=['train','test']
420
+
421
+
422
+ for k in range(2):
423
+ X_temp = X_write[k]
424
+ y_temp = y_write[k]
425
+ groups_temp = groups_write[k]
426
+ arch=archivos[k]
427
+ id_in=groups_temp[0]
428
+
429
+
430
+ data_folder = default_path + '/../../data/train'
431
+ check_create(data_folder)
432
+ count = 0
433
+ with open(data_folder + '/{}.txt'.format(arch), mode='w', encoding='utf-8') as f:
434
+ for i in range(len(X_temp)):
435
+ if groups_temp[i] != id_in:
436
+ id_in=groups_temp[i]
437
+ f.write('\n')
438
+ count = 0
439
+
440
+ count += 1
441
+ f.write(X_temp[i]+' '+ y_temp[i])
442
+ f.write('\n')
443
+
444
+ if count >= 150:
445
+ count = 0
446
+ f.write('\n')
447
+
448
+
449
+
450
+ # print("Before check")
451
+ # checkpoint = "xlm-roberta-large"
452
+ # config = AutoConfig.from_pretrained(checkpoint)
453
+
454
+ # with init_empty_weights():
455
+ # model = AutoModelForSequenceClassification.from_config(config)
456
+
457
+ # print("After check")
458
+ # try:
459
+ # tagger = load_checkpoint_and_dispatch(model, path_model+'/best-model.pt', device_map="auto")
460
+ # except:
461
+ # try:
462
+ # tagger = load_checkpoint_and_dispatch(model, path_model+'/final-model.pt', device_map="auto")
463
+ # except:
464
+ # print('Invalid model')
465
+ # return 1
466
+
467
+
src/scripts/functionsrc.py ADDED
@@ -0,0 +1,718 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon May 1 20:54:14 2023
4
+
5
+ @author: sanmo
6
+ """
7
+ import numpy as np
8
+ import torch
9
+ import torch.nn as nn
10
+ from torch.utils.data import Dataset, DataLoader, random_split
11
+ import pandas as pd
12
+ import json
13
+ import os
14
+ import gc
15
+ from distutils.dir_util import copy_tree
16
+ import shutil
17
+ import argparse
18
+ import flair
19
+ from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, TransformerWordEmbeddings
20
+ from torch import nn, tanh, sigmoid, relu, FloatTensor, rand, stack, optim, cuda, softmax, save, device, tensor, int64, no_grad, concat
21
+ from flair.data import Sentence
22
+
23
+
24
+ default_path = 0
25
+ entities = 0
26
+ tagger_document = 0
27
+ embeddings = 0
28
+ json_data = 0
29
+ train_loader = 0
30
+ val_loader = 0
31
+ test_loader = 0
32
+ cnn = 0
33
+ optimizer = 0
34
+ criterion = 0
35
+ device = 0
36
+ test_sentences = 0
37
+ best_valid_loss = np.inf
38
+
39
+ def check_create(path):
40
+ import os
41
+
42
+ if not (os.path.isdir(path)):
43
+ os.makedirs(path)
44
+
45
+ def str2bool(v):
46
+ if isinstance(v, bool):
47
+ return v
48
+ if v.lower() in ('yes', 'True','true', 't', 'y', '1'):
49
+ return True
50
+ elif v.lower() in ('no', 'False', 'false', 'f', 'n', '0'):
51
+ return False
52
+ else:
53
+ raise argparse.ArgumentTypeError('Boolean value expected.')
54
+
55
+
56
+
57
+ class MyDataset(Dataset):
58
+ def __init__(self, len_c1=7, len_c2=5, len_c3=11):
59
+ global json_data
60
+
61
+ def create_vector(c1,sentence):
62
+ #print("Hola mundo")
63
+ if len(c1): c1 = torch.cat([c1,sentence], dim=0)
64
+ else: c1 = sentence
65
+ return c1
66
+
67
+ def fix_tensor(tensor, size):
68
+
69
+
70
+ while tensor.shape[2] < size:
71
+ tensor = torch.cat([tensor,torch.zeros(1,1,1,1024)], dim=2)
72
+
73
+ tensor = tensor[:,:,:size,:]
74
+ return tensor
75
+
76
+ data = []
77
+ self.targets = []
78
+ self.c1=[]
79
+ self.h1=[]
80
+ self.c2=[]
81
+ self.h2=[]
82
+ self.c3=[]
83
+
84
+ tensor_temp = torch.Tensor(json_data['flat_emb'])
85
+ data = tensor_temp.reshape((tensor_temp.shape[0],1,-1,1024))
86
+
87
+ self.targets = create_vector(self.targets,torch.Tensor(json_data['relation']))
88
+ for n_sen in range(tensor_temp.shape[0]):
89
+
90
+
91
+ tensor_temp = data[n_sen,0,:json_data['h_pos'][n_sen][0],:].reshape((1, 1,-1,1024))
92
+ self.c1 = create_vector(self.c1,fix_tensor(tensor_temp, len_c1))
93
+
94
+ tensor_temp = data[n_sen,0,json_data['h_pos'][n_sen][0]:json_data['h_pos'][n_sen][-1]+1,:].mean(dim=0).reshape((1,1024))
95
+ self.h1 = create_vector(self.h1,tensor_temp)
96
+
97
+ tensor_temp = data[n_sen,0,json_data['h_pos'][n_sen][-1]+1:json_data['t_pos'][n_sen][0],:].reshape((1,1,-1,1024))
98
+ self.c2 = create_vector(self.c2,fix_tensor(tensor_temp, len_c2))
99
+
100
+ tensor_temp = data[n_sen,0,json_data['t_pos'][n_sen][0]:json_data['t_pos'][n_sen][-1]+1,:].mean(dim=0).reshape((1,1024))
101
+ self.h2 = create_vector(self.h2,tensor_temp)
102
+
103
+ tensor_temp = data[n_sen,0,json_data['t_pos'][n_sen][-1]+1:,:].reshape((1, 1,-1,1024))
104
+ self.c3 = create_vector(self.c3,fix_tensor(tensor_temp, len_c3))
105
+ del data
106
+ del tensor_temp
107
+ del json_data
108
+ gc.collect()
109
+ self.targets = self.targets.to(torch.int64)
110
+ #print('Dataset class')
111
+
112
+ def __len__(self):
113
+ return len(self.targets)
114
+
115
+ def __getitem__(self, index):
116
+ c1x = self.c1[index]
117
+ h1x = self.h1[index]
118
+ c2x = self.c2[index]
119
+ h2x = self.h2[index]
120
+ c3x = self.c3[index]
121
+ y = self.targets[index]
122
+ return c1x,h1x,c2x,h2x,c3x, y
123
+
124
+
125
+ def update_step(c1, h1,c2,h2,c3, label):
126
+ global cnn
127
+ global optimizer
128
+ global criterion
129
+ prediction = cnn(c1, h1,c2,h2,c3)
130
+ optimizer.zero_grad()
131
+ loss = criterion(prediction, label)
132
+ loss.backward()
133
+ optimizer.step()
134
+ acc = (nn.Softmax(dim=1)(prediction).detach().argmax(dim=1) == label).type(torch.float).sum().item()
135
+ #print(acc)
136
+ return loss.item(), acc
137
+
138
+ def evaluate_step(c1, h1,c2,h2,c3, label):
139
+ global cnn
140
+ global optimizer
141
+ global criterion
142
+ prediction = cnn(c1, h1,c2,h2,c3)
143
+ loss = criterion(prediction, label)
144
+ acc = (nn.Softmax(dim=1)(prediction).detach().argmax(dim=1) == label).type(torch.float).sum().item()
145
+ return loss.item(), acc
146
+
147
+ def train_one_epoch(epoch, name, rel2id_file):
148
+ global train_loader
149
+ global val_loader
150
+ global device
151
+ global best_valid_loss
152
+ global optimizer
153
+ global cnn
154
+ global default_path
155
+ if (device == torch.device('cuda:0')): cnn.cuda()
156
+
157
+ train_loss, valid_loss, acc_train, acc_valid = 0.0, 0.0, 0.0, 0.0
158
+ for batch_idx, (c1, h1,c2,h2,c3, targets) in enumerate(train_loader):
159
+ train_loss_temp, acc_train_temp = update_step(c1.to(device), h1.to(device),c2.to(device),h2.to(device),c3.to(device), targets.to(device))
160
+ train_loss += train_loss_temp
161
+ acc_train += acc_train_temp
162
+ for batch_idx, (c1, h1,c2,h2,c3, targets) in enumerate(val_loader):
163
+ valid_loss_temp, acc_valid_temp = evaluate_step(c1.to(device), h1.to(device),c2.to(device),h2.to(device),c3.to(device), targets.to(device))
164
+ valid_loss += valid_loss_temp
165
+ acc_valid += acc_valid_temp
166
+ # Guardar modelo si es el mejor hasta ahora
167
+
168
+ if epoch % 10 == 0:
169
+ # path_save = os.path.normpath(default_path +'/../../models/RC/{}/best_model.pt'.format(name))
170
+ # path_save = path_save.replace('\\', '/')
171
+ # print(os.path.abspath(__file__))
172
+ if valid_loss < best_valid_loss:
173
+ best_valid_loss = valid_loss
174
+ torch.save({'epoca': epoch,
175
+ 'model_state_dict': cnn.state_dict(),
176
+ 'optimizer_state_dict': optimizer.state_dict(),
177
+ 'loss': valid_loss},
178
+ '../../models/RC/{}/best_model.pt'.format(name))
179
+
180
+ #path_files = default_path + '/../../data/RC/'
181
+ a=0
182
+ #rel2id_file = path_files + 'rel2id.json'
183
+
184
+ return train_loss/len(train_loader.dataset), valid_loss/len(val_loader.dataset), acc_train/len(train_loader.dataset), acc_valid/len(val_loader.dataset)
185
+
186
+
187
+ def FocalLoss(input, target, gamma=0, alpha=None, size_average=True):
188
+ from torch.autograd import Variable
189
+ if input.dim()>2:
190
+ input = input.view(input.size(0),input.size(1),-1) # N,C,H,W => N,C,H*W
191
+ input = input.transpose(1,2) # N,C,H*W => N,H*W,C
192
+ input = input.contiguous().view(-1,input.size(2)) # N,H*W,C => N*H*W,C
193
+ target = target.view(-1,1)
194
+
195
+ logpt = nn.functional.log_softmax(input)
196
+ logpt = logpt.gather(1,target)
197
+ logpt = logpt.view(-1)
198
+ pt = Variable(logpt.data.exp())
199
+
200
+ if alpha is not None:
201
+ if alpha.type()!=input.data.type():
202
+ alpha = alpha.type_as(input.data)
203
+ at = alpha.gather(0,target.data.view(-1))
204
+ logpt = logpt * Variable(at)
205
+
206
+ loss = -1 * (1-pt)**gamma * logpt
207
+ if size_average: return loss.mean()
208
+ else: return loss.sum()
209
+
210
+
211
+ class EarlyStopping:
212
+ def __init__(self, patience=5, min_delta=0):
213
+
214
+ self.patience = patience
215
+ self.min_delta = min_delta
216
+ self.counter = 0
217
+ self.min_validation_loss = np.inf
218
+ self.early_stop = False
219
+
220
+ def __call__(self, validation_loss):
221
+ if validation_loss < self.min_validation_loss:
222
+ self.min_validation_loss = validation_loss
223
+ self.counter = 0
224
+ self.early_stop = False
225
+
226
+ elif validation_loss > (self.min_validation_loss + self.min_delta):
227
+ print('Less')
228
+ self.counter += 1
229
+ if self.counter >= self.patience:
230
+ self.early_stop = True
231
+
232
+
233
+
234
+
235
+
236
+ def SoftmaxModified(x):
237
+ input_softmax = x.transpose(0,1)
238
+ function_activation = nn.Softmax(dim=1)
239
+ output = function_activation(input_softmax)
240
+ output = output.transpose(0,1)
241
+ return output
242
+
243
+
244
+ class MultiModalGMUAdapted(nn.Module):
245
+
246
+ def __init__(self, input_size_array, hidden_size, dropoutProbability):
247
+ """Initialize params."""
248
+ super(MultiModalGMUAdapted, self).__init__()
249
+ self.input_size_array = input_size_array
250
+ self.hidden_size = hidden_size
251
+ self.dropout = nn.Dropout(dropoutProbability)
252
+
253
+ self.h_1_layer = nn.Linear(input_size_array[0], hidden_size, bias=False)
254
+ self.h_2_layer = nn.Linear(input_size_array[1], hidden_size, bias=False)
255
+ self.h_3_layer = nn.Linear(input_size_array[2], hidden_size, bias=False)
256
+ self.h_4_layer = nn.Linear(input_size_array[3], hidden_size, bias=False)
257
+ self.h_5_layer = nn.Linear(input_size_array[4], hidden_size, bias=False)
258
+
259
+ self.z_1_layer = nn.Linear(input_size_array[0], hidden_size, bias=False)
260
+ self.z_2_layer = nn.Linear(input_size_array[1], hidden_size, bias=False)
261
+ self.z_3_layer = nn.Linear(input_size_array[2], hidden_size, bias=False)
262
+ self.z_4_layer = nn.Linear(input_size_array[3], hidden_size, bias=False)
263
+ self.z_5_layer = nn.Linear(input_size_array[4], hidden_size, bias=False)
264
+
265
+
266
+ #self.z_weights = [nn.Linear(input_size_array[m], hidden_size, bias=False) for m in range(modalities_number)]
267
+ #self.input_weights = [nn.Linear(size, hidden_size, bias=False) for size in input_size_array]
268
+
269
+
270
+ def forward(self, inputModalities):
271
+ """Propogate input through the network."""
272
+ # h_modalities = [self.dropout(self.input_weights[i](i_mod)) for i,i_mod in enumerate(inputModalities)]
273
+ # h_modalities = [tanh(h) for h in h_modalities]
274
+
275
+ h1 = tanh(self.dropout(self.h_1_layer(inputModalities[0])))
276
+ h2 = tanh(self.dropout(self.h_2_layer(inputModalities[1])))
277
+ h3 = tanh(self.dropout(self.h_3_layer(inputModalities[2])))
278
+ h4 = tanh(self.dropout(self.h_4_layer(inputModalities[3])))
279
+ h5 = tanh(self.dropout(self.h_5_layer(inputModalities[4])))
280
+
281
+ z1 = self.dropout(self.z_1_layer(inputModalities[0]))
282
+ z2 = self.dropout(self.z_2_layer(inputModalities[1]))
283
+ z3 = self.dropout(self.z_3_layer(inputModalities[2]))
284
+ z4 = self.dropout(self.z_4_layer(inputModalities[3]))
285
+ z5 = self.dropout(self.z_5_layer(inputModalities[4]))
286
+
287
+
288
+ #z_modalities = [self.dropout(self.z_weights[i](i_mod)) for i,i_mod in enumerate(inputModalities)]
289
+ z_modalities = stack([z1, z2, z3, z4, z5])
290
+ z_normalized = SoftmaxModified(z_modalities)
291
+ final = z_normalized[0] * h1 + z_normalized[1] * h2 + z_normalized[2] * h3 + z_normalized[3] * h4 + z_normalized[4] * h5
292
+
293
+
294
+ return final
295
+
296
+ class MyCNN(nn.Module):
297
+ def __init__(self, num_classes=10, len_c1=7, len_c2=5, len_c3=11):
298
+ super(MyCNN, self).__init__()
299
+ shape1 = (((len_c1-2)))#-2)#//2)-2)//2)
300
+ shape2 = (((len_c2-2)))#-2)#//2)-2)//2)
301
+ shape3 = (((len_c3-2)))#-2)#//2)-2)//2)
302
+
303
+ # Define convolutional layers
304
+ self.conv_layers1 = nn.Sequential(
305
+ nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3,1)),
306
+ nn.ReLU(),
307
+ nn.MaxPool2d(kernel_size=(shape1,1)),
308
+ )
309
+
310
+ self.conv_layers2 = nn.Sequential(
311
+ nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3,1)),
312
+ nn.ReLU(),
313
+ nn.MaxPool2d(kernel_size=(shape2,1)),
314
+ )
315
+
316
+ self.conv_layers3 = nn.Sequential(
317
+ nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3,1)),
318
+ nn.ReLU(),
319
+ nn.MaxPool2d(kernel_size=(shape3,1)),
320
+ )
321
+
322
+
323
+ self.multi_gmu = MultiModalGMUAdapted([1024,1024,1024,1024,1024], 1024, 0.5)
324
+
325
+
326
+
327
+
328
+
329
+ self.fc_simple_layers_multi = nn.Sequential(
330
+ nn.Linear(1024 , 256),
331
+ nn.ReLU(),
332
+ nn.Dropout(0.5),
333
+ nn.Linear(256, num_classes)
334
+ )
335
+
336
+
337
+ def forward(self, c1, h1,c2,h2,c3):
338
+
339
+ # Pass inputs through convolutional layers
340
+
341
+ c1 = self.conv_layers1(c1)
342
+ c2 = self.conv_layers2(c2)
343
+ c3 = self.conv_layers3(c3)
344
+ #print(c1.shape)
345
+
346
+ h1 = tanh(h1)
347
+ h2 = tanh(h2)
348
+ #print(c1.shape)
349
+ c1 = torch.flatten(c1, start_dim=1)
350
+ c2 = torch.flatten(c2, start_dim=1)
351
+ c3 = torch.flatten(c3, start_dim=1)
352
+ #print(c1.shape)
353
+
354
+
355
+ # Multi GMU
356
+ mgmu_out = self.multi_gmu([c1,h1,c2,h2,c3])
357
+ x = self.fc_simple_layers_multi(mgmu_out)
358
+
359
+
360
+ # Return final output
361
+ return x
362
+
363
+ def define_model():
364
+ global cnn
365
+ global optimizer
366
+ global criterion
367
+
368
+ cnn = MyCNN()
369
+ optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001)
370
+ criterion = lambda pred,tar: FocalLoss(input=pred,target=tar,gamma=0.7)
371
+
372
+ def train_model(name, epocs, rel2id_path):
373
+ max_epochs, best_valid_loss = epocs, np.inf
374
+ running_loss = np.zeros(shape=(max_epochs, 4))
375
+ early_stopping = EarlyStopping(patience=10, min_delta=0.01)
376
+
377
+ for epoch in range(max_epochs):
378
+ running_loss[epoch] = train_one_epoch(epoch, name, rel2id_path)
379
+ early_stopping(running_loss[epoch, 1])
380
+ print(f"Epoch {epoch} \t Train_loss = {running_loss[epoch, 0]:.4f} \t Valid_loss = {running_loss[epoch, 1]:.4f} \n\t\t\t Train_acc = {running_loss[epoch, 2]:.4f} \t Valid_acc = {running_loss[epoch, 3]:.4f}")
381
+ if early_stopping.early_stop:
382
+ print("We are at epoch:", epoch)
383
+ break
384
+
385
+
386
+ def usage_cuda_rc(cuda):
387
+ global device
388
+ if cuda:
389
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
390
+ flair.device = device
391
+ if flair.device == torch.device('cpu'): return 'Error handling GPU, CPU will be used'
392
+ elif flair.device == torch.device('cuda:0'): return 'GPU detected, GPU will be used'
393
+ else:
394
+ device = torch.device('cpu')
395
+ flair.device = device
396
+ return 'CPU will be used'
397
+
398
+
399
+ def create_embbedings():
400
+ global embeddings
401
+ if (not embeddings):
402
+ embeddings = TransformerWordEmbeddings(
403
+ model='xlm-roberta-large',
404
+ layers="-1",
405
+ subtoken_pooling="first",
406
+ fine_tune=True,
407
+ use_context=True,
408
+ )
409
+
410
+
411
+
412
+
413
+ def prepare_data(rel2id_path, path_data):
414
+ create_embbedings()
415
+ global embeddings
416
+ global json_data
417
+ #Embbeb data
418
+
419
+ global default_path
420
+
421
+
422
+ #path_files
423
+
424
+
425
+ #rel2id_file = path_files + 'rel2id.json'
426
+ #shutil.copy(rel2id_path, rel2id_file)
427
+ with open(rel2id_path, mode='r') as f:
428
+ rel2id = json.load(f)
429
+
430
+ #path_data = path_files+"train.txt"
431
+
432
+ #Json to save the data
433
+ json_data = {'flat_emb':[], 'relation':[], 'h_pos':[], 't_pos':[]}
434
+ PADDING = np.zeros(1024)
435
+ doc=0
436
+ with open(path_data, mode='r', encoding='utf-8') as f:
437
+ sentence_temp = []
438
+ h_pos = []
439
+ t_pos = []
440
+ current_ent=''
441
+ cont=0
442
+
443
+ for n,line in enumerate(f.readlines()):
444
+ if line != '\n':
445
+ sentence_temp.append(line.split('\t')[0])
446
+
447
+ if line.split('\t')[1] != 'O':
448
+ if current_ent == '':
449
+ h_pos.append(cont)
450
+ current_ent = line.split('\t')[1]
451
+
452
+ elif line.split('\t')[1] == current_ent:
453
+ h_pos.append(cont)
454
+
455
+ else:
456
+ t_pos.append(cont)
457
+
458
+ if line.split('\t')[2].replace('\n','') != '-' : relation = line.split('\t')[2].replace('\n','')
459
+
460
+ cont += 1
461
+
462
+ else:
463
+
464
+ #Embbedding sentence
465
+ sentence = Sentence(sentence_temp)
466
+ embeddings.embed(sentence)
467
+
468
+
469
+
470
+ sentence_emb_flatten = []
471
+ for tk in sentence:
472
+ #flatten_embeddings
473
+ if len(sentence_emb_flatten): sentence_emb_flatten = np.hstack((sentence_emb_flatten,
474
+ tk.embedding.detach().to('cpu').numpy()))
475
+ else: sentence_emb_flatten = tk.embedding.detach().to('cpu').numpy()
476
+
477
+ number_padding = 100 - len(sentence)
478
+
479
+ if number_padding > 0:
480
+ for pd in range(number_padding):
481
+ sentence_emb_flatten = np.hstack((sentence_emb_flatten,
482
+ PADDING))
483
+
484
+ #Save embeddings information
485
+ json_data['flat_emb'].append(list(sentence_emb_flatten))
486
+ json_data['h_pos'].append(h_pos)
487
+ json_data['t_pos'].append(t_pos)
488
+ json_data['relation'].append(rel2id[relation])
489
+
490
+ sentence_temp = []
491
+ h_pos = []
492
+ t_pos = []
493
+ current_ent=''
494
+ cont=0
495
+ dataset = MyDataset()
496
+
497
+ train_set_size = int(len(dataset) * 0.9)
498
+ valid_set_size = len(dataset) - train_set_size
499
+
500
+ train_dataset, val_dataset = random_split(dataset, [train_set_size, valid_set_size ])
501
+ del dataset
502
+ global train_loader
503
+ global val_loader
504
+
505
+ train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
506
+ val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)
507
+
508
+ def prepare_data_test(name, path_data):
509
+ create_embbedings()
510
+ global embeddings
511
+ global json_data
512
+ global test_sentences
513
+ global entities
514
+ #Embbeb data
515
+
516
+ global default_path
517
+
518
+
519
+ test_sentences = []
520
+ entities = []
521
+ #path_files = default_path + '/../../data/RC/'
522
+ # path_model = default_path + '/../../models/RC/{}/'.format(name)
523
+
524
+
525
+
526
+ #path_data = path_files+"test.txt"
527
+
528
+ #Json to save the data
529
+ json_data = {'flat_emb':[], 'relation':[], 'h_pos':[], 't_pos':[]}
530
+ PADDING = np.zeros(1024)
531
+ doc=0
532
+
533
+ with open(path_data, mode='r', encoding='utf-8') as f:
534
+
535
+ sentence_temp = []
536
+ entities_temp = []
537
+ h_pos = []
538
+ t_pos = []
539
+ current_ent=''
540
+ cont=0
541
+ for n,line in enumerate(f.readlines()):
542
+ #print(line)
543
+ if line != '\n':
544
+ sentence_temp.append(line.split('\t')[0])
545
+ entities_temp.append(line.split('\t')[1])
546
+ if line.split('\t')[1] != 'O':
547
+ if current_ent == '':
548
+ h_pos.append(cont)
549
+ current_ent = line.split('\t')[1]
550
+
551
+ elif line.split('\t')[1] == current_ent:
552
+ h_pos.append(cont)
553
+
554
+ else:
555
+ t_pos.append(cont)
556
+
557
+ # if line.split('\t')[2].replace('\n','') != '-' : relation = '-'
558
+
559
+ cont += 1
560
+
561
+ else:
562
+
563
+ #Embbedding sentence
564
+ sentence = Sentence(sentence_temp)
565
+
566
+ test_sentences.append(sentence_temp)
567
+ entities.append(entities_temp)
568
+ #print('mid while')
569
+ embeddings.embed(sentence)
570
+
571
+
572
+ sentence_emb_flatten = []
573
+ for tk in sentence:
574
+ #flatten_embeddings
575
+ if len(sentence_emb_flatten): sentence_emb_flatten = np.hstack((sentence_emb_flatten,
576
+ tk.embedding.detach().to('cpu').numpy()))
577
+ else: sentence_emb_flatten = tk.embedding.detach().to('cpu').numpy()
578
+
579
+ number_padding = 100 - len(sentence)
580
+
581
+ if number_padding > 0:
582
+ for pd in range(number_padding):
583
+ sentence_emb_flatten = np.hstack((sentence_emb_flatten,
584
+ PADDING))
585
+
586
+ #Save embeddings information
587
+ json_data['flat_emb'].append(list(sentence_emb_flatten))
588
+ json_data['h_pos'].append(h_pos)
589
+ json_data['t_pos'].append(t_pos)
590
+ json_data['relation'].append(1)
591
+
592
+ sentence_temp = []
593
+ entities_temp = []
594
+ h_pos = []
595
+ t_pos = []
596
+ current_ent=''
597
+ cont=0
598
+
599
+
600
+ dataset = MyDataset()
601
+ global test_loader
602
+ test_loader = DataLoader(dataset, batch_size=64, shuffle=False)
603
+
604
+
605
+ del dataset
606
+
607
+
608
+
609
+
610
+ #------------------Backend functions----------------------------------------
611
+
612
+ def training_model_rc(name, path_data, rel2id_path, epochs=200):
613
+ global default_path
614
+ default_path = os.path.dirname(os.path.abspath(__file__))
615
+ default_path = default_path.replace('\\', '/')
616
+ print(name)
617
+ #FUNCION
618
+
619
+ try:
620
+ define_model()
621
+ except:
622
+ return 13
623
+ print('Model defined')
624
+ check_create(default_path + '/../../models/RC/{}/'.format(name))
625
+
626
+ try:
627
+ prepare_data(rel2id_path, path_data)
628
+ except:
629
+ return 12
630
+
631
+ print('Data prepared')
632
+ #Train the model
633
+ try:
634
+ train_model(name, epochs, rel2id_path)
635
+ except:
636
+ return 7
637
+ #save the model in
638
+ path_model = default_path + '/../../models/RC/{}/best_model.pt'.format(name)
639
+
640
+ shutil.copy(rel2id_path, default_path + '/../../models/RC/{}/rel2id.json'.format(name))
641
+
642
+ return "model trined and saved at {}".format(path_model)
643
+
644
+
645
+ def use_model_rc(name, path_data, output_dir):
646
+ global default_path
647
+ default_path = os.path.dirname(os.path.abspath(__file__))
648
+ default_path = default_path.replace('\\', '/')
649
+ #--------------Load the trained model-------------------------
650
+ path_model = default_path + '/../../models/RC/{}/best_model.pt'.format(name)
651
+
652
+
653
+ rel2id_file = default_path + '/../../models/RC/{}/rel2id.json'.format(name)
654
+ with open(rel2id_file, mode='r') as f:
655
+ rel2id = json.load(f)
656
+ id2rel = [m for _,m in sorted(zip(list(rel2id.values()),list(rel2id.keys())), key=lambda pair: pair[0])]
657
+
658
+ if not (os.path.isfile(path_model)):
659
+ print('Model does not exists')
660
+ return 10
661
+ print(path_data)
662
+ if not os.path.isfile(path_data):
663
+ print('Input file is not a file')
664
+ return 9
665
+
666
+ global cnn
667
+ try:
668
+ cnn = MyCNN()
669
+ except:
670
+ return 13
671
+
672
+ print('Model defined')
673
+
674
+ try:
675
+ saved_model = torch.load(path_model)
676
+ cnn.load_state_dict(saved_model['model_state_dict'])
677
+ except:
678
+ return 1
679
+ print('Model loaded')
680
+
681
+ #-----------------Load the document-------------------------
682
+ try:
683
+ prepare_data_test(name, path_data)
684
+ except:
685
+ return 12
686
+
687
+ global json_data
688
+ print('Data prepared')
689
+ #-----------------Predict-------------------------
690
+
691
+ global test_loader
692
+ ypred = []
693
+ relations = []
694
+ for batch_idx, (c1, h1,c2,h2,c3, targets) in enumerate(test_loader):
695
+ x = cnn(c1, h1,c2,h2,c3)
696
+ ypred.append(nn.Softmax(dim=1)(x).detach().argmax(dim=1))
697
+ ypred = np.concatenate(ypred)
698
+
699
+ relations = [id2rel[rel] for rel in ypred]
700
+ print('prediction')
701
+ #-----------------Tagged the document-------------------------
702
+ global test_sentences
703
+ global entities
704
+ results = {'sentences':{'tokens':test_sentences, 'entities':entities}, 'relations': relations}
705
+
706
+
707
+ #-----------------Save the results-------------------------
708
+ try:
709
+ with open(output_dir, "w", encoding='utf-8') as write_file:
710
+ json.dump(results, write_file)
711
+
712
+ print('-'*20,'Tagged complete','-'*20)
713
+ print('Document tagged saved in {}'.format(output_dir))
714
+ except:
715
+ print('Error in output file')
716
+ return 11
717
+
718
+ return results
src/scripts/model_rc.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Thu May 4 11:19:59 2023
4
+
5
+ @author: gita
6
+ """
7
+
8
+ import numpy as np
9
+ import torch
10
+ import torch.nn as nn
11
+ from torch.utils.data import Dataset, DataLoader, random_split
12
+ import pandas as pd
13
+ import json
14
+ import os
15
+ import gc
16
+ from distutils.dir_util import copy_tree
17
+ import argparse
18
+ import flair
19
+ from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, TransformerWordEmbeddings
20
+ from torch import nn, tanh, sigmoid, relu, FloatTensor, rand, stack, optim, cuda, softmax, save, device, tensor, int64, no_grad, concat
21
+ from flair.data import Sentence
22
+
23
+ default_path = os.path.dirname(os.path.abspath(__file__))
24
+ tagger_document = 0
25
+ embeddings = 0
26
+ json_data = 0
27
+ train_loader = 0
28
+ val_loader = 0
29
+ cnn = 0
30
+ optimizer = 0
31
+ criterion = 0
32
+ device = 0
33
+
34
+
35
+ class MyDataset(Dataset):
36
+ def __init__(self, len_c1=7, len_c2=5, len_c3=11):
37
+ global json_data
38
+
39
+ def create_vector(c1,sentence):
40
+ #print("Hola mundo")
41
+ if len(c1): c1 = torch.cat([c1,sentence], dim=0)
42
+ else: c1 = sentence
43
+ return c1
44
+
45
+ def fix_tensor(tensor, size):
46
+
47
+
48
+ while tensor.shape[2] < size:
49
+ tensor = torch.cat([tensor,torch.zeros(1,1,1,1024)], dim=2)
50
+
51
+ tensor = tensor[:,:,:size,:]
52
+ return tensor
53
+
54
+ tensor_temp = torch.Tensor(json_data['flat_emb'])
55
+
56
+ data = tensor_temp.reshape((tensor_temp.shape[0],1,-1,1024))
57
+
58
+
59
+ self.targets = create_vector(self.targets,torch.Tensor(json_data['relation']))
60
+
61
+
62
+
63
+ for n_sen in range(tensor_temp.shape[0]):
64
+
65
+
66
+ tensor_temp = data[n_sen,0,:json_data['h_pos'][n_sen][0],:].reshape((1, 1,-1,1024))
67
+ self.c1 = create_vector(self.c1,fix_tensor(tensor_temp, len_c1))
68
+
69
+ tensor_temp = data[n_sen,0,json_data['h_pos'][n_sen][0]:json_data['h_pos'][n_sen][-1]+1,:].mean(dim=0).reshape((1,1024))
70
+ self.h1 = create_vector(self.h1,tensor_temp)
71
+
72
+ tensor_temp = data[n_sen,0,json_data['h_pos'][n_sen][-1]+1:json_data['t_pos'][n_sen][0],:].reshape((1,1,-1,1024))
73
+ self.c2 = create_vector(self.c2,fix_tensor(tensor_temp, len_c2))
74
+
75
+ tensor_temp = data[n_sen,0,json_data['t_pos'][n_sen][0]:json_data['t_pos'][n_sen][-1]+1,:].mean(dim=0).reshape((1,1024))
76
+ self.h2 = create_vector(self.h2,tensor_temp)
77
+
78
+ tensor_temp = data[n_sen,0,json_data['t_pos'][n_sen][-1]+1:,:].reshape((1, 1,-1,1024))
79
+ self.c3 = create_vector(self.c3,fix_tensor(tensor_temp, len_c3))
80
+
81
+ del data
82
+ del tensor_temp
83
+ del json_data
84
+ gc.collect()
85
+ self.targets = self.targets.to(torch.int64)
86
+
87
+ def __len__(self):
88
+ return len(self.targets)
89
+
90
+ def __getitem__(self, index):
91
+ c1x = self.c1[index]
92
+ h1x = self.h1[index]
93
+ c2x = self.c2[index]
94
+ h2x = self.h2[index]
95
+ c3x = self.c3[index]
96
+ y = self.targets[index]
97
+ return c1x,h1x,c2x,h2x,c3x, y
98
+
99
+
100
+ def update_step(c1, h1,c2,h2,c3, label):
101
+ global cnn
102
+ global optimizer
103
+ global criterion
104
+ prediction = cnn(c1, h1,c2,h2,c3)
105
+ optimizer.zero_grad()
106
+ loss = criterion(prediction, label)
107
+ loss.backward()
108
+ optimizer.step()
109
+ acc = (nn.Softmax(dim=1)(prediction).detach().argmax(dim=1) == label).type(torch.float).sum().item()
110
+ #print(acc)
111
+ return loss.item(), acc
112
+
113
+ def evaluate_step(c1, h1,c2,h2,c3, label):
114
+ global cnn
115
+ global optimizer
116
+ global criterion
117
+ prediction = cnn(c1, h1,c2,h2,c3)
118
+ loss = criterion(prediction, label)
119
+ acc = (nn.Softmax(dim=1)(prediction).detach().argmax(dim=1) == label).type(torch.float).sum().item()
120
+ return loss.item(), acc
121
+
122
+ def train_one_epoch(epoch):
123
+ global train_loader
124
+ global val_loader
125
+ global device
126
+ if (device == torch.device('cuda:0')): cnn.cuda()
127
+ train_loss, valid_loss, acc_train, acc_valid = 0.0, 0.0, 0.0, 0.0
128
+ for batch_idx, (c1, h1,c2,h2,c3, targets) in enumerate(train_loader):
129
+ train_loss_temp, acc_train_temp = update_step(c1.to(device), h1.to(device),c2.to(device),h2.to(device),c3.to(device), targets.to(device))
130
+ train_loss += train_loss_temp
131
+ acc_train += acc_train_temp
132
+ for batch_idx, (c1, h1,c2,h2,c3, targets) in enumerate(val_loader):
133
+ valid_loss_temp, acc_valid_temp = evaluate_step(c1.to(device), h1.to(device),c2.to(device),h2.to(device),c3.to(device), targets.to(device))
134
+ valid_loss += valid_loss_temp
135
+ acc_valid += acc_valid_temp
136
+ # Guardar modelo si es el mejor hasta ahora
137
+ global best_valid_loss
138
+ if epoch % 10 == 0:
139
+ if valid_loss < best_valid_loss:
140
+ best_valid_loss = valid_loss
141
+ torch.save({'epoca': epoch,
142
+ 'model_state_dict': cnn.state_dict(),
143
+ 'optimizer_state_dict': optimizer.state_dict(),
144
+ 'loss': valid_loss},
145
+ '/../../RC/model/best_model.pt')
146
+
147
+ return train_loss/len(train_loader.dataset), valid_loss/len(val_loader.dataset), acc_train/len(train_loader.dataset), acc_valid/len(val_loader.dataset)
148
+
149
+
150
+ def FocalLoss(input, target, gamma=0, alpha=None, size_average=True):
151
+ from torch.autograd import Variable
152
+ if input.dim()>2:
153
+ input = input.view(input.size(0),input.size(1),-1) # N,C,H,W => N,C,H*W
154
+ input = input.transpose(1,2) # N,C,H*W => N,H*W,C
155
+ input = input.contiguous().view(-1,input.size(2)) # N,H*W,C => N*H*W,C
156
+ target = target.view(-1,1)
157
+
158
+ logpt = nn.functional.log_softmax(input)
159
+ logpt = logpt.gather(1,target)
160
+ logpt = logpt.view(-1)
161
+ pt = Variable(logpt.data.exp())
162
+
163
+ if alpha is not None:
164
+ if alpha.type()!=input.data.type():
165
+ alpha = alpha.type_as(input.data)
166
+ at = alpha.gather(0,target.data.view(-1))
167
+ logpt = logpt * Variable(at)
168
+
169
+ loss = -1 * (1-pt)**gamma * logpt
170
+ if size_average: return loss.mean()
171
+ else: return loss.sum()
172
+
173
+
174
+ class EarlyStopping:
175
+ def __init__(self, patience=5, min_delta=0):
176
+
177
+ self.patience = patience
178
+ self.min_delta = min_delta
179
+ self.counter = 0
180
+ self.min_validation_loss = np.inf
181
+ self.early_stop = False
182
+
183
+ def __call__(self, validation_loss):
184
+ if validation_loss < self.min_validation_loss:
185
+ self.min_validation_loss = validation_loss
186
+ self.counter = 0
187
+ self.early_stop = False
188
+
189
+ elif validation_loss > (self.min_validation_loss + self.min_delta):
190
+ print('Less')
191
+ self.counter += 1
192
+ if self.counter >= self.patience:
193
+ self.early_stop = True
194
+
195
+
196
+
197
+
198
+
199
+ def SoftmaxModified(x):
200
+ input_softmax = x.transpose(0,1)
201
+ function_activation = nn.Softmax(dim=1)
202
+ output = function_activation(input_softmax)
203
+ output = output.transpose(0,1)
204
+ return output
205
+
206
+
207
+ class MultiModalGMUAdapted(nn.Module):
208
+
209
+ def __init__(self, input_size_array, hidden_size, dropoutProbability):
210
+ """Initialize params."""
211
+ super(MultiModalGMUAdapted, self).__init__()
212
+ self.input_size_array = input_size_array
213
+ self.hidden_size = hidden_size
214
+ self.dropout = nn.Dropout(dropoutProbability)
215
+
216
+ self.h_1_layer = nn.Linear(input_size_array[0], hidden_size, bias=False)
217
+ self.h_2_layer = nn.Linear(input_size_array[1], hidden_size, bias=False)
218
+ self.h_3_layer = nn.Linear(input_size_array[2], hidden_size, bias=False)
219
+ self.h_4_layer = nn.Linear(input_size_array[3], hidden_size, bias=False)
220
+ self.h_5_layer = nn.Linear(input_size_array[4], hidden_size, bias=False)
221
+
222
+ self.z_1_layer = nn.Linear(input_size_array[0], hidden_size, bias=False)
223
+ self.z_2_layer = nn.Linear(input_size_array[1], hidden_size, bias=False)
224
+ self.z_3_layer = nn.Linear(input_size_array[2], hidden_size, bias=False)
225
+ self.z_4_layer = nn.Linear(input_size_array[3], hidden_size, bias=False)
226
+ self.z_5_layer = nn.Linear(input_size_array[4], hidden_size, bias=False)
227
+
228
+
229
+ #self.z_weights = [nn.Linear(input_size_array[m], hidden_size, bias=False) for m in range(modalities_number)]
230
+ #self.input_weights = [nn.Linear(size, hidden_size, bias=False) for size in input_size_array]
231
+
232
+
233
+ def forward(self, inputModalities):
234
+ """Propogate input through the network."""
235
+ # h_modalities = [self.dropout(self.input_weights[i](i_mod)) for i,i_mod in enumerate(inputModalities)]
236
+ # h_modalities = [tanh(h) for h in h_modalities]
237
+
238
+ h1 = tanh(self.dropout(self.h_1_layer(inputModalities[0])))
239
+ h2 = tanh(self.dropout(self.h_2_layer(inputModalities[1])))
240
+ h3 = tanh(self.dropout(self.h_3_layer(inputModalities[2])))
241
+ h4 = tanh(self.dropout(self.h_4_layer(inputModalities[3])))
242
+ h5 = tanh(self.dropout(self.h_5_layer(inputModalities[4])))
243
+
244
+ z1 = self.dropout(self.z_1_layer(inputModalities[0]))
245
+ z2 = self.dropout(self.z_2_layer(inputModalities[1]))
246
+ z3 = self.dropout(self.z_3_layer(inputModalities[2]))
247
+ z4 = self.dropout(self.z_4_layer(inputModalities[3]))
248
+ z5 = self.dropout(self.z_5_layer(inputModalities[4]))
249
+
250
+
251
+ #z_modalities = [self.dropout(self.z_weights[i](i_mod)) for i,i_mod in enumerate(inputModalities)]
252
+ z_modalities = stack([z1, z2, z3, z4, z5])
253
+ z_normalized = SoftmaxModified(z_modalities)
254
+ final = z_normalized[0] * h1 + z_normalized[1] * h2 + z_normalized[2] * h3 + z_normalized[3] * h4 + z_normalized[4] * h5
255
+
256
+
257
+ return final
258
+
259
+ class MyCNN(nn.Module):
260
+ def __init__(self, num_classes=10, len_c1=7, len_c2=5, len_c3=11):
261
+ super(MyCNN, self).__init__()
262
+ shape1 = (((len_c1-2)))#-2)#//2)-2)//2)
263
+ shape2 = (((len_c2-2)))#-2)#//2)-2)//2)
264
+ shape3 = (((len_c3-2)))#-2)#//2)-2)//2)
265
+
266
+ # Define convolutional layers
267
+ self.conv_layers1 = nn.Sequential(
268
+ nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3,1)),
269
+ nn.ReLU(),
270
+ nn.MaxPool2d(kernel_size=(shape1,1)),
271
+ )
272
+
273
+ self.conv_layers2 = nn.Sequential(
274
+ nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3,1)),
275
+ nn.ReLU(),
276
+ nn.MaxPool2d(kernel_size=(shape2,1)),
277
+ )
278
+
279
+ self.conv_layers3 = nn.Sequential(
280
+ nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3,1)),
281
+ nn.ReLU(),
282
+ nn.MaxPool2d(kernel_size=(shape3,1)),
283
+ )
284
+
285
+
286
+ self.multi_gmu = MultiModalGMUAdapted([1024,1024,1024,1024,1024], 1024, 0.5)
287
+
288
+
289
+
290
+
291
+
292
+
293
+ self.fc_simple_layers_multi = nn.Sequential(
294
+ nn.Linear(1024 , 256),
295
+ nn.ReLU(),
296
+ nn.Dropout(0.5),
297
+ nn.Linear(256, num_classes)
298
+ )
299
+
300
+
301
+ def forward(self, c1, h1,c2,h2,c3):
302
+
303
+ # Pass inputs through convolutional layers
304
+
305
+ c1 = self.conv_layers1(c1)
306
+ c2 = self.conv_layers2(c2)
307
+ c3 = self.conv_layers3(c3)
308
+ #print(c1.shape)
309
+
310
+ h1 = tanh(h1)
311
+ h2 = tanh(h2)
312
+ #print(c1.shape)
313
+ c1 = torch.flatten(c1, start_dim=1)
314
+ c2 = torch.flatten(c2, start_dim=1)
315
+ c3 = torch.flatten(c3, start_dim=1)
316
+ #print(c1.shape)
317
+
318
+ mgmu_out, mgmu_weigths = self.multi_gmu([c1,h1,c2,h2,c3])
319
+
320
+
321
+ # Multi GMU
322
+ x = self.fc_simple_layers_multi(mgmu_out)
323
+
324
+ # Return final output
325
+ return x
326
+
327
+ def define_model():
328
+ global cnn
329
+ global optimizer
330
+ global criterion
331
+
332
+ cnn = MyCNN()
333
+ optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001)
334
+ criterion = lambda pred,tar: FocalLoss(input=pred,target=tar,gamma=0.7)
335
+
336
+ def train_model():
337
+ max_epochs, best_valid_loss = 200, np.inf
338
+ running_loss = np.zeros(shape=(max_epochs, 4))
339
+ early_stopping = EarlyStopping(patience=10, min_delta=0.01)
340
+
341
+ for epoch in range(max_epochs):
342
+ running_loss[epoch] = train_one_epoch(epoch)
343
+ early_stopping(running_loss[epoch, 1])
344
+ print(f"Epoch {epoch} \t Train_loss = {running_loss[epoch, 0]:.4f} \t Valid_loss = {running_loss[epoch, 1]:.4f} \n\t\t\t Train_acc = {running_loss[epoch, 2]:.4f} \t Valid_acc = {running_loss[epoch, 3]:.4f}")
345
+ if early_stopping.early_stop:
346
+ print("We are at epoch:", epoch)
347
+ break
348
+
349
+
350
+ def usage_cuda_rc(cuda):
351
+ global device
352
+ if cuda:
353
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
354
+ flair.device = device
355
+ if flair.device == torch.device('cpu'): return 'Error handling GPU, CPU will be used'
356
+ elif flair.device == torch.device('cuda:0'): return 'GPU detected, GPU will be used'
357
+ else:
358
+ device = torch.device('cpu')
359
+ flair.device = device
360
+ return 'CPU will be used'
361
+
362
+
363
+ def create_embbedings():
364
+ global embeddings
365
+ if (not embeddings):
366
+ embeddings = TransformerWordEmbeddings(
367
+ model='xlm-roberta-large',
368
+ layers="-1",
369
+ subtoken_pooling="first",
370
+ fine_tune=True,
371
+ use_context=True,
372
+ )
373
+
374
+
375
+
376
+
377
+ def prepare_data():
378
+ create_embbedings()
379
+ global embeddings
380
+ global json_data
381
+ #Embbeb data
382
+
383
+ path_files = default_path + '/../../data/RC/'
384
+
385
+ rel2id_file = path_files + 'rel2id.json'
386
+ with open(rel2id_file, mode='r') as f:
387
+ rel2id = json.load(f)
388
+
389
+
390
+ path_data = path_files+"train.txt"
391
+
392
+ #Json to save the data
393
+ json_data = {'flat_emb':[], 'relation':[], 'h_pos':[], 't_pos':[]}
394
+ PADDING = np.zeros(1024)
395
+ doc=0
396
+ with open(path_data, mode='r', encoding='utf-8') as f:
397
+ sentence_temp = []
398
+ h_pos = []
399
+ t_pos = []
400
+ current_ent=''
401
+ cont=0
402
+
403
+ for n,line in enumerate(f.readlines()):
404
+ if line != '\n':
405
+ sentence_temp.append(line.split('\t')[0])
406
+
407
+ if line.split('\t')[1] != 'O':
408
+ if current_ent == '':
409
+ h_pos.append(cont)
410
+ current_ent = line.split('\t')[1]
411
+
412
+ elif line.split('\t')[1] == current_ent:
413
+ h_pos.append(cont)
414
+
415
+ else:
416
+ t_pos.append(cont)
417
+
418
+ if line.split('\t')[2].replace('\n','') != '-' : relation = line.split('\t')[2].replace('\n','')
419
+
420
+ cont += 1
421
+
422
+ else:
423
+
424
+ #Embbedding sentence
425
+ sentence = Sentence(sentence_temp)
426
+ embeddings.embed(sentence)
427
+
428
+
429
+
430
+ sentence_emb_flatten = []
431
+ for tk in sentence:
432
+ #flatten_embeddings
433
+ if len(sentence_emb_flatten): sentence_emb_flatten = np.hstack((sentence_emb_flatten,
434
+ tk.embedding.detach().to('cpu').numpy()))
435
+ else: sentence_emb_flatten = tk.embedding.detach().to('cpu').numpy()
436
+
437
+ number_padding = 100 - len(sentence)
438
+
439
+ if number_padding > 0:
440
+ for pd in range(number_padding):
441
+ sentence_emb_flatten = np.hstack((sentence_emb_flatten,
442
+ PADDING))
443
+
444
+ #Save embeddings information
445
+ json_data['flat_emb'].append(list(sentence_emb_flatten))
446
+ json_data['h_pos'].append(h_pos)
447
+ json_data['t_pos'].append(t_pos)
448
+ json_data['relation'].append(rel2id[relation])
449
+
450
+ sentence_temp = []
451
+ h_pos = []
452
+ t_pos = []
453
+ current_ent=''
454
+ cont=0
455
+ dataset = MyDataset()
456
+
457
+ train_set_size = int(len(dataset) * 0.9)
458
+ valid_set_size = len(dataset) - train_set_size
459
+
460
+ train_dataset, val_dataset = random_split(dataset, [train_set_size, valid_set_size ])
461
+ del dataset
462
+ global train_loader
463
+ global val_loader
464
+
465
+ train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
466
+ val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)
src/scripts/upsampling.py ADDED
@@ -0,0 +1,517 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Oct 11 16:31:58 2022
4
+
5
+ @author: gita
6
+ """
7
+ import random
8
+ import numpy as np
9
+ import copy
10
+
11
+ class upsampling_ner:
12
+
13
+
14
+
15
+ def __init__(self, path_data, entities, pos_labels):
16
+ """
17
+
18
+
19
+ Parameters
20
+ ----------
21
+ path_data : str
22
+ Path of the dataset in format CONLL.
23
+ entities : List
24
+ List of the senten.
25
+ pos_labels : Dict
26
+ Dictionary where the keys are the kind of labels, and the values
27
+ are the position of the labels in one line
28
+
29
+ Returns
30
+ -------
31
+ None.
32
+
33
+ """
34
+ self.__path_data = path_data
35
+ self.__entities = entities
36
+ self.__search_factor = 1000
37
+ self.__pos_labels = pos_labels
38
+ self.__get_data_variables()
39
+
40
+ def __get_data_variables(self):
41
+ """
42
+ Takes the data path and turn the senteces into a matrix of shape
43
+ (Sentences, tokens of each sentence).
44
+ Also executes the __get_total_mentions.
45
+
46
+ Returns
47
+ -------
48
+ None.
49
+
50
+ """
51
+ col = self.__pos_labels['ner']
52
+ self.__dataset = []
53
+ self.__data_labels = []
54
+ data_temp = []
55
+ labels_temp = []
56
+ with open(self.__path_data, mode='r', encoding='utf-8') as f:
57
+ for line in f.readlines():
58
+ if line != '\n':
59
+ data_temp.append(line.split(' ')[0])
60
+ labels_temp.append(line.split(' ')[col][:-1])
61
+ #print('si')
62
+ else:
63
+ self.__dataset.append(data_temp)
64
+ self.__data_labels.append(labels_temp)
65
+ data_temp = []
66
+ labels_temp = []
67
+ self.__get_total_mentions_and_tokens()
68
+
69
+
70
+ def get_mentions(self, sentence, labels):
71
+ """
72
+ Divide sentence to a dictionary of mentions and a dictionary of labels
73
+ of the mentions
74
+
75
+
76
+ Parameters
77
+ ----------
78
+ sentence : List
79
+ List of the tokens of the sentence.
80
+ labels : List
81
+ List of the labels of each token.
82
+
83
+ Returns
84
+ -------
85
+ dict_mentions : Dictionary
86
+ sentece divided by its entities mentions key=number of mention,
87
+ value= set of tokens in the mention.
88
+ dict_label_mentions : Dictionary
89
+ labels corresponding of the mentions in the same order as token
90
+ mentions. key= number of mention, value= label of the mention.
91
+
92
+ """
93
+
94
+ dict_mentions = {}
95
+ dict_label_mentions = {}
96
+ mention = 0
97
+ #print(sentence)
98
+ dict_mentions[mention] = [sentence[0]]
99
+
100
+ dict_label_mentions[mention] = labels[0]
101
+ for i,label in enumerate(labels[1:]):
102
+ if label == labels[i]:
103
+ dict_mentions[mention].append(sentence[i+1])
104
+ else:
105
+ mention += 1
106
+ dict_mentions[mention] = [sentence[i+1]]
107
+ dict_label_mentions[mention] = labels[i+1]
108
+
109
+ return dict_mentions, dict_label_mentions
110
+
111
+
112
+ def __get_total_mentions_and_tokens(self):
113
+ """
114
+ Takes the dataset and divide ach sentence in mentions and it store it
115
+ in __all_mentions
116
+
117
+ Returns
118
+ -------
119
+ None.
120
+
121
+ """
122
+
123
+ self.__all_mentions = {}
124
+ self.__tokens_per_entity = {}
125
+
126
+ for key in self.__entities:
127
+ self.__all_mentions[key] = []
128
+ self.__tokens_per_entity[key] = []
129
+
130
+ for i,sentence in enumerate(self.__dataset):
131
+ if sentence:
132
+ for j,word in enumerate(sentence):
133
+ self.__tokens_per_entity[self.__data_labels[i][j]].append(word)
134
+
135
+ mentions,label_mentions = self.get_mentions(sentence, self.__data_labels[i])
136
+ for n,label in enumerate(label_mentions.values()):
137
+ if mentions[n] not in self.__all_mentions[label]: self.__all_mentions[label].append(mentions[n]);
138
+
139
+
140
+ def get_mentions_dict(self):
141
+ "Return all the mentions in the dataset"
142
+ return self.__all_mentions
143
+
144
+
145
+ def get_dataset(self):
146
+ "Return the dataset"
147
+ return self.__dataset, self.__data_labels
148
+
149
+
150
+ def Label_wise_token_replacement(self, token_mentions, label_mentions, labels, p):
151
+ """
152
+ Do the Label wise token replacement to a sentence divided in mentions
153
+
154
+
155
+ Parameters
156
+ ----------
157
+ token_mentions : Dictionary
158
+ sentece divided by its entities mentions key=number of mention,
159
+ value= set of tokens in the mention.
160
+ label_mentions : Dictionary
161
+ labels corresponding of the mentions in the same order as token
162
+ mentions. key= number of mention, value= label of the mention
163
+ labels : List
164
+ list of entities to be upsampled.
165
+ p : float
166
+ probability upsampled a mention selected.
167
+
168
+ Returns
169
+ -------
170
+ token_mentions : Dictionary
171
+ token mentions but with mention replacement.
172
+
173
+ """
174
+
175
+ p = 1-p
176
+ for i in token_mentions.keys():
177
+ if label_mentions[i] in labels:
178
+ for j,token in enumerate(token_mentions[i]):
179
+ umbral=np.random.uniform(0,1)
180
+ if umbral>=p:
181
+ token_selected = random.choice(self.__tokens_per_entity[label_mentions[i]])
182
+ search = 0
183
+ while token_selected == token and search <= self.__search_factor:
184
+ token_selected = random.choice(self.__tokens_per_entity[label_mentions[i]])
185
+ search += 1
186
+ token_mentions[i][j] = token_selected
187
+
188
+ return token_mentions
189
+
190
+ def synonym_replacement(self, token_mentions, label_mentions, labels, p):
191
+
192
+ """
193
+ Do the synonym_replacement to a sentence divided in mentions
194
+
195
+
196
+ Parameters
197
+ ----------
198
+ token_mentions : Dictionary
199
+ sentece divided by its entities mentions key=number of mention,
200
+ value= set of tokens in the mention.
201
+ label_mentions : Dictionary
202
+ labels corresponding of the mentions in the same order as token
203
+ mentions. key= number of mention, value= label of the mention
204
+ labels : List
205
+ list of entities to be upsampled.
206
+ p : float
207
+ probability upsampled a mention selected.
208
+
209
+ Returns
210
+ -------
211
+ token_mentions : Dictionary
212
+ token mentions but with shuffled.
213
+
214
+ """
215
+
216
+ import requests
217
+ from bs4 import BeautifulSoup
218
+ url='http://www.wordreference.com/sinonimos/'
219
+
220
+ p = 1-p
221
+
222
+ for i in token_mentions.keys():
223
+ if label_mentions[i] in labels:
224
+ for j,token in enumerate(token_mentions[i]):
225
+ umbral=np.random.uniform(0,1)
226
+ if umbral>=p:
227
+
228
+ buscar=url+token
229
+ resp=requests.get(buscar)
230
+ bs=BeautifulSoup(resp.text,'lxml')
231
+ try:
232
+ lista=bs.find(class_='trans clickable')
233
+ sino=lista.find('li')
234
+ list_synonyms = sino.next_element.split(', ')
235
+ except:
236
+ list_synonyms = False
237
+ if list_synonyms:
238
+ synonym_selected = random.choice(list_synonyms)
239
+ search = 0
240
+ while synonym_selected == token_mentions[i][j] and search <= self.__search_factor:
241
+ synonym_selected = random.choice(list_synonyms)
242
+ search += 1
243
+ token_mentions[i][j] = synonym_selected
244
+
245
+ return token_mentions
246
+
247
+
248
+
249
+ def mention_replacement(self, token_mentions, label_mentions, labels, p):
250
+ """
251
+ Do the mentions replacement to a sentence divided in mentions
252
+
253
+
254
+ Parameters
255
+ ----------
256
+ token_mentions : Dictionary
257
+ sentece divided by its entities mentions key=number of mention,
258
+ value= set of tokens in the mention.
259
+ label_mentions : Dictionary
260
+ labels corresponding of the mentions in the same order as token
261
+ mentions. key= number of mention, value= label of the mention
262
+ labels : List
263
+ list of entities to be upsampled.
264
+ p : float
265
+ probability upsampled a mention selected.
266
+
267
+ Returns
268
+ -------
269
+ token_mentions : Dictionary
270
+ token mentions but with mention replacement.
271
+
272
+ """
273
+
274
+ p = 1-p
275
+ for i in token_mentions.keys():
276
+ if label_mentions[i] in labels:
277
+ umbral=np.random.uniform(0,1)
278
+ if umbral>=p:
279
+ set_of_mentions = self.__all_mentions[label_mentions[i]]
280
+ mention_selected = random.choice(set_of_mentions)
281
+ search = 0
282
+ while token_mentions[i] == mention_selected and search <= self.__search_factor:
283
+ mention_selected = random.choice(set_of_mentions)
284
+ search += 1
285
+ token_mentions[i] = mention_selected
286
+ return token_mentions
287
+
288
+
289
+
290
+ def shuffle_within_segments(self, token_mentions, label_mentions, labels, p):
291
+ """
292
+ Do the shuffle within segments to a sentence divided in mentions
293
+
294
+
295
+ Parameters
296
+ ----------
297
+ token_mentions : Dictionary
298
+ sentece divided by its entities mentions key=number of mention,
299
+ value= set of tokens in the mention.
300
+ label_mentions : Dictionary
301
+ labels corresponding of the mentions in the same order as token
302
+ mentions. key= number of mention, value= label of the mention
303
+ labels : List
304
+ list of entities to be upsampled.
305
+ p : float
306
+ probability upsampled a mention selected.
307
+
308
+ Returns
309
+ -------
310
+ token_mentions : Dictionary
311
+ token mentions but with shuffled.
312
+
313
+ """
314
+
315
+ p = 1-p
316
+ for i in token_mentions.keys():
317
+ if label_mentions[i] in labels:
318
+ umbral=np.random.uniform(0,1)
319
+ if umbral>=p: random.shuffle(token_mentions[i])
320
+ return token_mentions
321
+
322
+ def mention_back_traslation(self, token_mentions, label_mentions, labels, p):
323
+ """
324
+ Do the back traslation to each mention in a sentence divided in mentions
325
+
326
+
327
+ Parameters
328
+ ----------
329
+ token_mentions : Dictionary
330
+ sentece divided by its entities mentions key=number of mention,
331
+ value= set of tokens in the mention.
332
+ label_mentions : Dictionary
333
+ labels corresponding of the mentions in the same order as token
334
+ mentions. key= number of mention, value= label of the mention
335
+ labels : List
336
+ list of entities to be upsampled.
337
+ p : float
338
+ probability upsampled a mention selected.
339
+
340
+ Returns
341
+ -------
342
+ token_mentions : Dictionary
343
+ token mentions but with mention brack traslation.
344
+ }
345
+ """
346
+
347
+ from deep_translator import GoogleTranslator
348
+ from nltk.tokenize import word_tokenize
349
+
350
+
351
+ p = 1-p
352
+ for i in token_mentions.keys():
353
+ if label_mentions[i] in labels:
354
+ umbral=np.random.uniform(0,1)
355
+ if umbral>=p:
356
+ try:
357
+ language = random.choice(['en', 'sv', 'fr', 'ja', 'ko', 'af', 'sq', 'cs', 'es', 'el', 'ga'])
358
+ to_translate = " ".join(token_mentions[i])
359
+
360
+ #print("to_trans: ", to_translate[:20])
361
+
362
+ translateden = GoogleTranslator(source='auto', target=language).translate(to_translate)
363
+
364
+ #print("Trans: ",translateden[:20])
365
+
366
+ translatedes = GoogleTranslator(source='auto', target='de').translate(translateden)
367
+
368
+ #print("back Trans: ",translatedes[:20])
369
+
370
+ mention_selected = word_tokenize(translatedes)
371
+ token_mentions[i] = mention_selected
372
+ except:
373
+ pass
374
+ return token_mentions
375
+
376
+
377
+ def upsampling(self, labels, p, methods=None):
378
+
379
+ if methods is None:
380
+ print("Not upsampling required")
381
+ else:
382
+ new_mentions = []
383
+ new_labels = []
384
+ for i,sentence in enumerate(self.__dataset):
385
+ if sentence:
386
+ sentence_mentions,label_mentions = self.get_mentions(sentence, self.__data_labels[i])
387
+
388
+
389
+ if "SiS" in methods:
390
+ new_mentions_temp = self.shuffle_within_segments(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
391
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
392
+ new_mentions.append(new_mentions_temp)
393
+ new_labels.append(label_mentions)
394
+
395
+
396
+ if "LwTR" in methods:
397
+ new_mentions_temp = self.Label_wise_token_replacement(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
398
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
399
+ new_mentions.append(new_mentions_temp)
400
+ new_labels.append(label_mentions)
401
+
402
+
403
+
404
+
405
+ if "MR" in methods:
406
+ new_mentions_temp = self.mention_replacement(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
407
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
408
+ new_mentions.append(new_mentions_temp)
409
+ new_labels.append(label_mentions)
410
+
411
+
412
+
413
+ if "SR" in methods:
414
+ new_mentions_temp = self.synonym_replacement(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
415
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
416
+ new_mentions.append(new_mentions_temp)
417
+ new_labels.append(label_mentions)
418
+
419
+
420
+
421
+ if "MBT" in methods:
422
+ new_mentions_temp = self.mention_back_traslation(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
423
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
424
+ new_mentions.append(new_mentions_temp)
425
+ new_labels.append(label_mentions)
426
+
427
+
428
+ #Turn the mentions into sentences
429
+ new_samples_generated = []
430
+ new_labels_generated = []
431
+
432
+ for i,mentions in enumerate(new_mentions):
433
+ new_labels_temp = new_labels[i]
434
+ sample_temp = []
435
+ labels_temp = []
436
+ for key in mentions.keys():
437
+ sample_temp += mentions[key]
438
+ labels_temp += [new_labels_temp[key]]*len(mentions[key])
439
+ new_samples_generated.append(sample_temp)
440
+ new_labels_generated.append(labels_temp)
441
+ return new_samples_generated, new_labels_generated
442
+
443
+
444
+
445
+ def mention_to_sentence(self, mentions, labels):
446
+ sample_temp = []
447
+ labels_temp = []
448
+ for key in mentions.keys():
449
+ sample_temp += mentions[key]
450
+ labels_temp += [labels[key]]*len(mentions[key])
451
+
452
+ return sample_temp, labels_temp
453
+
454
+
455
+
456
+ def upsampling_by_sentence(self, labels, p, methods=None):
457
+
458
+ if methods is None:
459
+ print("Not upsampling required")
460
+ else:
461
+ new_mentions = []
462
+ new_labels = []
463
+ map_sentences = []
464
+ map_labels = []
465
+ sentences_upsampled = []
466
+ labels_upsampled = []
467
+
468
+ for i,sentence in enumerate(self.__dataset):
469
+ sentences_upsampled_temp = {}
470
+ labels_upsampled_temp = {}
471
+
472
+ sentences_upsampled_temp["Original"] = sentence
473
+ labels_upsampled_temp["Original"] = self.__data_labels[i]
474
+
475
+ sentence_mentions,label_mentions = self.get_mentions(sentence, self.__data_labels[i])
476
+
477
+
478
+ if "SiS" in methods:
479
+ new_mentions_temp = self.shuffle_within_segments(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
480
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
481
+ sentences_upsampled_temp["SiS"], labels_upsampled_temp["SiS"] = self.mention_to_sentence(new_mentions_temp, label_mentions)
482
+
483
+
484
+ if "LwTR" in methods:
485
+ new_mentions_temp = self.Label_wise_token_replacement(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
486
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
487
+ sentences_upsampled_temp["LwTR"], labels_upsampled_temp["LwTR"] = self.mention_to_sentence(new_mentions_temp, label_mentions)
488
+
489
+
490
+
491
+
492
+ if "MR" in methods:
493
+ new_mentions_temp = self.mention_replacement(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
494
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
495
+ sentences_upsampled_temp["MR"], labels_upsampled_temp["MR"] = self.mention_to_sentence(new_mentions_temp, label_mentions)
496
+
497
+
498
+ if "SR" in methods:
499
+ new_mentions_temp = self.synonym_replacement(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
500
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
501
+ sentences_upsampled_temp["SR"], labels_upsampled_temp["SR"] = self.mention_to_sentence(new_mentions_temp, label_mentions)
502
+
503
+
504
+
505
+ if "MBT" in methods:
506
+ new_mentions_temp = self.mention_back_traslation(copy.deepcopy(sentence_mentions), label_mentions,labels ,p)
507
+ if new_mentions_temp not in new_mentions and new_mentions_temp != sentence_mentions:
508
+ sentences_upsampled_temp["MBT"], labels_upsampled_temp["MBT"] = self.mention_to_sentence(new_mentions_temp, label_mentions)
509
+
510
+ if len(sentences_upsampled_temp)>1:
511
+ print(len(sentences_upsampled_temp))
512
+ sentences_upsampled.append(sentences_upsampled_temp)
513
+ labels_upsampled.append(labels_upsampled_temp)
514
+
515
+ return sentences_upsampled, labels_upsampled
516
+
517
+