browndw commited on
Commit
bf34931
1 Parent(s): 22ab756

Update spaCy pipeline

Browse files
Files changed (9) hide show
  1. README.md +12 -12
  2. config.cfg +1 -1
  3. en_docusco_spacy-any-py3-none-any.whl +2 -2
  4. meta.json +118 -118
  5. ner/model +1 -1
  6. ner/moves +1 -1
  7. tagger/model +1 -1
  8. tok2vec/model +1 -1
  9. vocab/strings.json +2 -2
README.md CHANGED
@@ -14,27 +14,27 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.7897948078
18
  - name: NER Recall
19
  type: recall
20
- value: 0.7904761222
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.7901353181
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
- value: 0.9422182346
31
  ---
32
  English pipeline for part-of-speech and rhetorical tagging.
33
 
34
  | Feature | Description |
35
  | --- | --- |
36
  | **Name** | `en_docusco_spacy` |
37
- | **Version** | `1.2` |
38
  | **spaCy** | `>=3.5.0,<3.6.0` |
39
  | **Default Pipeline** | `tok2vec`, `tagger`, `ner` |
40
  | **Components** | `tok2vec`, `tagger`, `ner` |
@@ -60,10 +60,10 @@ English pipeline for part-of-speech and rhetorical tagging.
60
 
61
  | Type | Score |
62
  | --- | --- |
63
- | `TAG_ACC` | 94.22 |
64
- | `ENTS_F` | 79.01 |
65
- | `ENTS_P` | 78.98 |
66
- | `ENTS_R` | 79.05 |
67
- | `TOK2VEC_LOSS` | 18044765.75 |
68
- | `TAGGER_LOSS` | 2400921.93 |
69
- | `NER_LOSS` | 5935848.19 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.798987704
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.7954112218
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.7971954516
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
+ value: 0.9698599662
31
  ---
32
  English pipeline for part-of-speech and rhetorical tagging.
33
 
34
  | Feature | Description |
35
  | --- | --- |
36
  | **Name** | `en_docusco_spacy` |
37
+ | **Version** | `1.3` |
38
  | **spaCy** | `>=3.5.0,<3.6.0` |
39
  | **Default Pipeline** | `tok2vec`, `tagger`, `ner` |
40
  | **Components** | `tok2vec`, `tagger`, `ner` |
 
60
 
61
  | Type | Score |
62
  | --- | --- |
63
+ | `TAG_ACC` | 96.99 |
64
+ | `ENTS_F` | 79.72 |
65
+ | `ENTS_P` | 79.90 |
66
+ | `ENTS_R` | 79.54 |
67
+ | `TOK2VEC_LOSS` | 20924847.53 |
68
+ | `TAGGER_LOSS` | 1316790.55 |
69
+ | `NER_LOSS` | 5818469.98 |
config.cfg CHANGED
@@ -104,7 +104,7 @@ dropout = 0.1
104
  accumulate_gradient = 1
105
  patience = 1600
106
  max_epochs = 0
107
- max_steps = 35000
108
  eval_frequency = 250
109
  frozen_components = []
110
  annotating_components = []
 
104
  accumulate_gradient = 1
105
  patience = 1600
106
  max_epochs = 0
107
+ max_steps = 40000
108
  eval_frequency = 250
109
  frozen_components = []
110
  annotating_components = []
en_docusco_spacy-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d73aad65d669783246161c1be36f0be9001ee88c4b1fddce57311c8a8bc5030
3
- size 7502026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5b44d15d93b41b27c0650f71553ece8709eb2717910922fb129809e9423d54
3
+ size 7501545
meta.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "lang":"en",
3
  "name":"docusco_spacy",
4
- "version":"1.2",
5
  "description":"English pipeline for part-of-speech and rhetorical tagging.",
6
  "author":"David Brown",
7
  "email":"dwb2@andrew.cmu.edu",
@@ -346,195 +346,195 @@
346
 
347
  ],
348
  "performance":{
349
- "tag_acc":0.9422182346,
350
- "ents_f":0.7901353181,
351
- "ents_p":0.7897948078,
352
- "ents_r":0.7904761222,
353
  "ents_per_type":{
354
  "Contingent":{
355
- "p":0.8286840509,
356
- "r":0.7583240844,
357
- "f":0.791944364
358
  },
359
  "InformationExposition":{
360
- "p":0.8400248832,
361
- "r":0.8577633567,
362
- "f":0.8488014542
363
  },
364
  "AcademicTerms":{
365
- "p":0.7894026371,
366
- "r":0.8286671247,
367
- "f":0.8085584799
368
  },
369
  "ForceStressed":{
370
- "p":0.7898256456,
371
- "r":0.7841148116,
372
- "f":0.7869598681
373
  },
374
  "Character":{
375
- "p":0.8500444492,
376
- "r":0.8432579053,
377
- "f":0.8466375774
378
  },
379
  "Narrative":{
380
- "p":0.7737338187,
381
- "r":0.7965586415,
382
- "f":0.7849803462
383
  },
384
  "Strategic":{
385
- "p":0.7364217252,
386
- "r":0.6996483435,
387
- "f":0.7175642072
388
  },
389
  "MetadiscourseInteractive":{
390
- "p":0.8356010661,
391
- "r":0.6838480083,
392
- "f":0.7521464646
393
  },
394
  "Facilitate":{
395
- "p":0.7088353414,
396
- "r":0.6600373959,
397
- "f":0.6835665874
398
  },
399
  "Negative":{
400
- "p":0.7095155684,
401
- "r":0.682564777,
402
- "f":0.6957792879
403
  },
404
  "Interactive":{
405
- "p":0.839874111,
406
- "r":0.8383435157,
407
- "f":0.8391081154
408
  },
409
  "MetadiscourseCohesive":{
410
- "p":0.9192184725,
411
- "r":0.9288534712,
412
- "f":0.9240108556
413
  },
414
  "Description":{
415
- "p":0.7208328088,
416
- "r":0.7628920157,
417
- "f":0.7412662853
418
  },
419
  "PublicTerms":{
420
- "p":0.8309137834,
421
- "r":0.778781465,
422
- "f":0.8040034337
423
  },
424
  "Reasoning":{
425
- "p":0.8395115952,
426
- "r":0.7998493535,
427
- "f":0.8192006857
428
  },
429
  "Positive":{
430
- "p":0.7291883827,
431
- "r":0.6821606119,
432
- "f":0.7048909876
433
  },
434
  "Updates":{
435
- "p":0.7837246077,
436
- "r":0.7084891137,
437
- "f":0.7442102224
438
  },
439
  "InformationTopics":{
440
- "p":0.7918713029,
441
- "r":0.8200456611,
442
- "f":0.8057122551
443
  },
444
  "ConfidenceHigh":{
445
- "p":0.7615851193,
446
- "r":0.7704958272,
447
- "f":0.7660145605
448
  },
449
  "Citation":{
450
- "p":0.8037907101,
451
- "r":0.7775338928,
452
- "f":0.7904443132
453
  },
454
  "ConfidenceHedged":{
455
- "p":0.8374410068,
456
- "r":0.8770594696,
457
- "f":0.856792489
458
  },
459
  "InformationChange":{
460
- "p":0.7123025371,
461
- "r":0.7132585562,
462
- "f":0.7127802261
463
  },
464
  "InformationStates":{
465
- "p":0.8041805777,
466
- "r":0.8789930188,
467
- "f":0.8399241946
468
  },
469
  "FirstPerson":{
470
- "p":0.8766524888,
471
- "r":0.8892257463,
472
- "f":0.882894356
473
  },
474
  "Responsibility":{
475
- "p":0.7242662257,
476
- "r":0.6096033403,
477
- "f":0.6620064236
478
  },
479
  "Inquiry":{
480
- "p":0.6661799979,
481
- "r":0.6047524378,
482
- "f":0.6339817388
483
  },
484
  "InformationChangeNegative":{
485
- "p":0.6824946846,
486
- "r":0.5407074677,
487
- "f":0.6033834586
488
  },
489
  "ConfidenceLow":{
490
- "p":0.6462395543,
491
- "r":0.5510688836,
492
- "f":0.5948717949
493
  },
494
  "InformationPlace":{
495
- "p":0.865587996,
496
- "r":0.8944517093,
497
- "f":0.8797831779
498
- },
499
- "InformationReportVerbs":{
500
- "p":0.7508147746,
501
- "r":0.7912701252,
502
- "f":0.7705117932
503
  },
504
  "Future":{
505
- "p":0.756072781,
506
- "r":0.7410827449,
507
- "f":0.7485027202
508
  },
509
  "AcademicWritingMoves":{
510
- "p":0.7083700441,
511
- "r":0.4375510204,
512
- "f":0.5409587889
 
 
 
 
 
513
  },
514
  "Uncertainty":{
515
- "p":0.7549221275,
516
- "r":0.6497218007,
517
- "f":0.6983824929
518
  },
519
  "CitationHedged":{
520
- "p":0.7483221477,
521
- "r":0.9570815451,
522
- "f":0.8399246704
523
  },
524
  "CitationAuthority":{
525
- "p":0.7841151386,
526
- "r":0.5490854797,
527
- "f":0.6458836443
528
  },
529
  "InformationChangePositive":{
530
- "p":0.7640586797,
531
- "r":0.546169531,
532
- "f":0.6369967725
533
  }
534
  },
535
- "tok2vec_loss":180447.6574847773,
536
- "tagger_loss":24009.219291687,
537
- "ner_loss":59358.4819319265
538
  },
539
  "requirements":[
540
 
 
1
  {
2
  "lang":"en",
3
  "name":"docusco_spacy",
4
+ "version":"1.3",
5
  "description":"English pipeline for part-of-speech and rhetorical tagging.",
6
  "author":"David Brown",
7
  "email":"dwb2@andrew.cmu.edu",
 
346
 
347
  ],
348
  "performance":{
349
+ "tag_acc":0.9698599662,
350
+ "ents_f":0.7971954516,
351
+ "ents_p":0.798987704,
352
+ "ents_r":0.7954112218,
353
  "ents_per_type":{
354
  "Contingent":{
355
+ "p":0.818815331,
356
+ "r":0.782463929,
357
+ "f":0.8002270148
358
  },
359
  "InformationExposition":{
360
+ "p":0.8498392228,
361
+ "r":0.857557341,
362
+ "f":0.8536808374
363
  },
364
  "AcademicTerms":{
365
+ "p":0.8128432795,
366
+ "r":0.8176600252,
367
+ "f":0.8152445377
368
  },
369
  "ForceStressed":{
370
+ "p":0.8036501362,
371
+ "r":0.7950043821,
372
+ "f":0.7993038804
373
  },
374
  "Character":{
375
+ "p":0.8509934653,
376
+ "r":0.8578615428,
377
+ "f":0.8544137022
378
  },
379
  "Narrative":{
380
+ "p":0.7922994384,
381
+ "r":0.7865517992,
382
+ "f":0.789415157
383
  },
384
  "Strategic":{
385
+ "p":0.74291956,
386
+ "r":0.7049416991,
387
+ "f":0.7234325438
388
  },
389
  "MetadiscourseInteractive":{
390
+ "p":0.8243080626,
391
+ "r":0.7077258639,
392
+ "f":0.761581223
393
  },
394
  "Facilitate":{
395
+ "p":0.7420591457,
396
+ "r":0.6909739929,
397
+ "f":0.7156060206
398
  },
399
  "Negative":{
400
+ "p":0.7366169936,
401
+ "r":0.6818932229,
402
+ "f":0.7081995321
403
  },
404
  "Interactive":{
405
+ "p":0.8438560526,
406
+ "r":0.8501978617,
407
+ "f":0.8470150867
408
  },
409
  "MetadiscourseCohesive":{
410
+ "p":0.9307703425,
411
+ "r":0.9246894967,
412
+ "f":0.9277199553
413
  },
414
  "Description":{
415
+ "p":0.7184076094,
416
+ "r":0.7692427259,
417
+ "f":0.7429566137
418
  },
419
  "PublicTerms":{
420
+ "p":0.8250038862,
421
+ "r":0.8023660141,
422
+ "f":0.8135274957
423
  },
424
  "Reasoning":{
425
+ "p":0.8453436321,
426
+ "r":0.8060425995,
427
+ "f":0.8252254568
428
  },
429
  "Positive":{
430
+ "p":0.7428654449,
431
+ "r":0.6843594646,
432
+ "f":0.7124132921
433
  },
434
  "Updates":{
435
+ "p":0.7921472679,
436
+ "r":0.7476358038,
437
+ "f":0.7692481756
438
  },
439
  "InformationTopics":{
440
+ "p":0.7997236338,
441
+ "r":0.8110942833,
442
+ "f":0.8053688262
443
  },
444
  "ConfidenceHigh":{
445
+ "p":0.7693539348,
446
+ "r":0.7862870234,
447
+ "f":0.7777283211
448
  },
449
  "Citation":{
450
+ "p":0.8227242525,
451
+ "r":0.7993544222,
452
+ "f":0.8108709889
453
  },
454
  "ConfidenceHedged":{
455
+ "p":0.8354197349,
456
+ "r":0.8900831633,
457
+ "f":0.8618855884
458
  },
459
  "InformationChange":{
460
+ "p":0.7294003868,
461
+ "r":0.7230371009,
462
+ "f":0.7262048048
463
  },
464
  "InformationStates":{
465
+ "p":0.8306426735,
466
+ "r":0.8544531415,
467
+ "f":0.8423796861
468
  },
469
  "FirstPerson":{
470
+ "p":0.8819685753,
471
+ "r":0.9076103856,
472
+ "f":0.8946057773
473
  },
474
  "Responsibility":{
475
+ "p":0.7287275566,
476
+ "r":0.6496172582,
477
+ "f":0.6869021339
478
  },
479
  "Inquiry":{
480
+ "p":0.6458673322,
481
+ "r":0.6369402632,
482
+ "f":0.6413727359
483
  },
484
  "InformationChangeNegative":{
485
+ "p":0.738317757,
486
+ "r":0.532285233,
487
+ "f":0.6185970636
488
  },
489
  "ConfidenceLow":{
490
+ "p":0.8554216867,
491
+ "r":0.5059382423,
492
+ "f":0.6358208955
493
  },
494
  "InformationPlace":{
495
+ "p":0.8853535824,
496
+ "r":0.8875035024,
497
+ "f":0.8864272388
 
 
 
 
 
498
  },
499
  "Future":{
500
+ "p":0.7515119175,
501
+ "r":0.7649827992,
502
+ "f":0.758187528
503
  },
504
  "AcademicWritingMoves":{
505
+ "p":0.6664195701,
506
+ "r":0.4892517007,
507
+ "f":0.5642554527
508
+ },
509
+ "InformationReportVerbs":{
510
+ "p":0.7737441669,
511
+ "r":0.8067978533,
512
+ "f":0.7899253862
513
  },
514
  "Uncertainty":{
515
+ "p":0.7402862986,
516
+ "r":0.6408700051,
517
+ "f":0.6870001356
518
  },
519
  "CitationHedged":{
520
+ "p":0.7630662021,
521
+ "r":0.9399141631,
522
+ "f":0.8423076923
523
  },
524
  "CitationAuthority":{
525
+ "p":0.7866273353,
526
+ "r":0.5972377753,
527
+ "f":0.6789730533
528
  },
529
  "InformationChangePositive":{
530
+ "p":0.7317845829,
531
+ "r":0.605592776,
532
+ "f":0.6627350972
533
  }
534
  },
535
+ "tok2vec_loss":209248.4752924392,
536
+ "tagger_loss":13167.9055271149,
537
+ "ner_loss":58184.6998399578
538
  },
539
  "requirements":[
540
 
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54dd874f25444af84998a48f261e5cddd92e06d5fc7efb7b00f9933afaf47daa
3
  size 163912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aba221a3d579504169f52262fd70ea8a121a9f4e1c6f1fd186ede35abfbe5fe
3
  size 163912
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves�
 
1
+ ��moves�
tagger/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b0ae90f765f0c0201596b5aab08d92d55f55f4afc45caa5355754d2b029502e
3
  size 105978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:461db1e078111078173d225b54ffabd42ad232623f2dcf885ce021825852e07e
3
  size 105978
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c084fd57008cccf20a57ad9d78848ba1a041db525d076b662c0ad6186b9ad4c
3
  size 6009091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e68cf632c7b7e0720b53ee8c470444f0f69e01ec4044babcce83294c0e02d9d
3
  size 6009091
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c44723e3986900b1fa2c5008767f07ada9f3ac3a58a52c6bd57451fab44a894a
3
- size 6614948
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2397c5f890da06f450cc9102a3d46437e4927baf67e92c6e0b502c9add35a474
3
+ size 6614972