browndw commited on
Commit
22ab756
1 Parent(s): e0ba4d8

Update spaCy pipeline

Browse files
Files changed (8) hide show
  1. README.md +11 -11
  2. config.cfg +2 -2
  3. en_docusco_spacy-any-py3-none-any.whl +2 -2
  4. meta.json +123 -123
  5. ner/model +1 -1
  6. ner/moves +1 -1
  7. tagger/model +1 -1
  8. tok2vec/model +1 -1
README.md CHANGED
@@ -14,27 +14,27 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.7905337091
18
  - name: NER Recall
19
  type: recall
20
- value: 0.7900620784
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.7902978234
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
- value: 0.9421614376
31
  ---
32
  English pipeline for part-of-speech and rhetorical tagging.
33
 
34
  | Feature | Description |
35
  | --- | --- |
36
  | **Name** | `en_docusco_spacy` |
37
- | **Version** | `1.1` |
38
  | **spaCy** | `>=3.5.0,<3.6.0` |
39
  | **Default Pipeline** | `tok2vec`, `tagger`, `ner` |
40
  | **Components** | `tok2vec`, `tagger`, `ner` |
@@ -61,9 +61,9 @@ English pipeline for part-of-speech and rhetorical tagging.
61
  | Type | Score |
62
  | --- | --- |
63
  | `TAG_ACC` | 94.22 |
64
- | `ENTS_F` | 79.03 |
65
- | `ENTS_P` | 79.05 |
66
- | `ENTS_R` | 79.01 |
67
- | `TOK2VEC_LOSS` | 17939385.03 |
68
- | `TAGGER_LOSS` | 2398027.79 |
69
- | `NER_LOSS` | 5987358.43 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.7897948078
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.7904761222
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.7901353181
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
+ value: 0.9422182346
31
  ---
32
  English pipeline for part-of-speech and rhetorical tagging.
33
 
34
  | Feature | Description |
35
  | --- | --- |
36
  | **Name** | `en_docusco_spacy` |
37
+ | **Version** | `1.2` |
38
  | **spaCy** | `>=3.5.0,<3.6.0` |
39
  | **Default Pipeline** | `tok2vec`, `tagger`, `ner` |
40
  | **Components** | `tok2vec`, `tagger`, `ner` |
 
61
  | Type | Score |
62
  | --- | --- |
63
  | `TAG_ACC` | 94.22 |
64
+ | `ENTS_F` | 79.01 |
65
+ | `ENTS_P` | 78.98 |
66
+ | `ENTS_R` | 79.05 |
67
+ | `TOK2VEC_LOSS` | 18044765.75 |
68
+ | `TAGGER_LOSS` | 2400921.93 |
69
+ | `NER_LOSS` | 5935848.19 |
config.cfg CHANGED
@@ -1,6 +1,6 @@
1
  [paths]
2
- train = ""
3
- dev = ""
4
  vectors = null
5
  init_tok2vec = null
6
 
 
1
  [paths]
2
+ train = "spacy_train_fd.spacy"
3
+ dev = "spacy_test_fd.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
en_docusco_spacy-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80c2d7b616cb187c038261e93ca7996e3e5b67adfb6fc1a81d26b8700c5b4c8c
3
- size 7501766
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d73aad65d669783246161c1be36f0be9001ee88c4b1fddce57311c8a8bc5030
3
+ size 7502026
meta.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "lang":"en",
3
  "name":"docusco_spacy",
4
- "version":"1.1",
5
  "description":"English pipeline for part-of-speech and rhetorical tagging.",
6
  "author":"David Brown",
7
  "email":"dwb2@andrew.cmu.edu",
@@ -346,195 +346,195 @@
346
 
347
  ],
348
  "performance":{
349
- "tag_acc":0.9421614376,
350
- "ents_f":0.7902978234,
351
- "ents_p":0.7905337091,
352
- "ents_r":0.7900620784,
353
  "ents_per_type":{
354
  "Contingent":{
355
- "p":0.8194101982,
356
- "r":0.7684054754,
357
- "f":0.7930886354
358
  },
359
  "InformationExposition":{
360
- "p":0.8408782638,
361
- "r":0.8514455432,
362
- "f":0.8461289112
363
  },
364
  "AcademicTerms":{
365
- "p":0.7917226257,
366
- "r":0.8269208748,
367
- "f":0.8089390481
368
  },
369
  "ForceStressed":{
370
- "p":0.7889513438,
371
- "r":0.7872918493,
372
- "f":0.7881207229
373
  },
374
  "Character":{
375
- "p":0.8428886221,
376
- "r":0.8535599516,
377
- "f":0.8481907234
378
  },
379
  "Narrative":{
380
- "p":0.7941775374,
381
- "r":0.7763194725,
382
- "f":0.7851469732
383
  },
384
  "Strategic":{
385
- "p":0.7283594711,
386
- "r":0.7014251342,
387
- "f":0.7146386076
388
  },
389
  "MetadiscourseInteractive":{
390
- "p":0.8314383172,
391
- "r":0.6715646883,
392
- "f":0.7429986664
393
- },
394
- "Negative":{
395
- "p":0.7079502122,
396
- "r":0.6883289496,
397
- "f":0.6980017167
398
  },
399
  "Facilitate":{
400
- "p":0.7118275675,
401
- "r":0.6680265171,
402
- "f":0.6892318485
 
 
 
 
 
403
  },
404
  "Interactive":{
405
- "p":0.8448483369,
406
- "r":0.8406692585,
407
- "f":0.8427536169
408
  },
409
  "MetadiscourseCohesive":{
410
- "p":0.9115467032,
411
- "r":0.9314739034,
412
- "f":0.9214025743
413
  },
414
  "Description":{
415
- "p":0.718241746,
416
- "r":0.762914221,
417
- "f":0.7399043103
418
  },
419
  "PublicTerms":{
420
- "p":0.8216885583,
421
- "r":0.7849799683,
422
- "f":0.8029149109
423
  },
424
  "Reasoning":{
425
- "p":0.8353065446,
426
- "r":0.7942001088,
427
- "f":0.8142348449
428
  },
429
  "Positive":{
430
- "p":0.7350608922,
431
- "r":0.6682026769,
432
- "f":0.7000390613
433
  },
434
  "Updates":{
435
- "p":0.7852616757,
436
- "r":0.7358698043,
437
- "f":0.759763851
438
  },
439
  "InformationTopics":{
440
- "p":0.8050484915,
441
- "r":0.8124731572,
442
- "f":0.8087437842
443
  },
444
  "ConfidenceHigh":{
445
- "p":0.729696785,
446
- "r":0.783668794,
447
- "f":0.7557203724
448
  },
449
  "Citation":{
450
- "p":0.7883380426,
451
- "r":0.7925112976,
452
- "f":0.7904191617
453
  },
454
- "Uncertainty":{
455
- "p":0.7364028777,
456
- "r":0.6471927162,
457
- "f":0.688921793
458
  },
459
  "InformationChange":{
460
- "p":0.715477206,
461
- "r":0.7237081775,
462
- "f":0.7195691545
463
- },
464
- "InformationReportVerbs":{
465
- "p":0.7432557524,
466
- "r":0.8043649374,
467
- "f":0.7726038695
468
  },
469
  "InformationStates":{
470
- "p":0.7881446908,
471
- "r":0.8930611381,
472
- "f":0.8373292341
473
- },
474
- "ConfidenceHedged":{
475
- "p":0.8401550734,
476
- "r":0.8841205084,
477
- "f":0.8615772774
478
  },
479
  "FirstPerson":{
480
- "p":0.8702371032,
481
- "r":0.8930348259,
482
- "f":0.8814885862
483
  },
484
  "Responsibility":{
485
- "p":0.7245780156,
486
- "r":0.6123869172,
487
- "f":0.6637752216
488
  },
489
  "Inquiry":{
490
- "p":0.672815534,
491
- "r":0.5904572565,
492
- "f":0.6289517471
493
  },
494
  "InformationChangeNegative":{
495
- "p":0.6999255398,
496
- "r":0.5277933745,
497
- "f":0.6017925736
498
  },
499
  "ConfidenceLow":{
500
- "p":0.796812749,
501
- "r":0.4750593824,
502
- "f":0.5952380952
503
  },
504
  "InformationPlace":{
505
- "p":0.8673478574,
506
- "r":0.8999579655,
507
- "f":0.8833520526
 
 
 
 
 
508
  },
509
  "Future":{
510
- "p":0.7551444043,
511
- "r":0.757468767,
512
- "f":0.7563047998
513
  },
514
  "AcademicWritingMoves":{
515
- "p":0.7072072072,
516
- "r":0.4272108844,
517
- "f":0.5326547922
 
 
 
 
 
518
  },
519
  "CitationHedged":{
520
- "p":0.7910447761,
521
- "r":0.9098712446,
522
- "f":0.8463073852
523
  },
524
  "CitationAuthority":{
525
- "p":0.7478653943,
526
- "r":0.5558044046,
527
- "f":0.6376873662
528
  },
529
  "InformationChangePositive":{
530
- "p":0.7891513561,
531
- "r":0.5254879114,
532
- "f":0.6308795244
533
  }
534
  },
535
- "tok2vec_loss":179393.8503061496,
536
- "tagger_loss":23980.277885437,
537
- "ner_loss":59873.5843254536
538
  },
539
  "requirements":[
540
 
 
1
  {
2
  "lang":"en",
3
  "name":"docusco_spacy",
4
+ "version":"1.2",
5
  "description":"English pipeline for part-of-speech and rhetorical tagging.",
6
  "author":"David Brown",
7
  "email":"dwb2@andrew.cmu.edu",
 
346
 
347
  ],
348
  "performance":{
349
+ "tag_acc":0.9422182346,
350
+ "ents_f":0.7901353181,
351
+ "ents_p":0.7897948078,
352
+ "ents_r":0.7904761222,
353
  "ents_per_type":{
354
  "Contingent":{
355
+ "p":0.8286840509,
356
+ "r":0.7583240844,
357
+ "f":0.791944364
358
  },
359
  "InformationExposition":{
360
+ "p":0.8400248832,
361
+ "r":0.8577633567,
362
+ "f":0.8488014542
363
  },
364
  "AcademicTerms":{
365
+ "p":0.7894026371,
366
+ "r":0.8286671247,
367
+ "f":0.8085584799
368
  },
369
  "ForceStressed":{
370
+ "p":0.7898256456,
371
+ "r":0.7841148116,
372
+ "f":0.7869598681
373
  },
374
  "Character":{
375
+ "p":0.8500444492,
376
+ "r":0.8432579053,
377
+ "f":0.8466375774
378
  },
379
  "Narrative":{
380
+ "p":0.7737338187,
381
+ "r":0.7965586415,
382
+ "f":0.7849803462
383
  },
384
  "Strategic":{
385
+ "p":0.7364217252,
386
+ "r":0.6996483435,
387
+ "f":0.7175642072
388
  },
389
  "MetadiscourseInteractive":{
390
+ "p":0.8356010661,
391
+ "r":0.6838480083,
392
+ "f":0.7521464646
 
 
 
 
 
393
  },
394
  "Facilitate":{
395
+ "p":0.7088353414,
396
+ "r":0.6600373959,
397
+ "f":0.6835665874
398
+ },
399
+ "Negative":{
400
+ "p":0.7095155684,
401
+ "r":0.682564777,
402
+ "f":0.6957792879
403
  },
404
  "Interactive":{
405
+ "p":0.839874111,
406
+ "r":0.8383435157,
407
+ "f":0.8391081154
408
  },
409
  "MetadiscourseCohesive":{
410
+ "p":0.9192184725,
411
+ "r":0.9288534712,
412
+ "f":0.9240108556
413
  },
414
  "Description":{
415
+ "p":0.7208328088,
416
+ "r":0.7628920157,
417
+ "f":0.7412662853
418
  },
419
  "PublicTerms":{
420
+ "p":0.8309137834,
421
+ "r":0.778781465,
422
+ "f":0.8040034337
423
  },
424
  "Reasoning":{
425
+ "p":0.8395115952,
426
+ "r":0.7998493535,
427
+ "f":0.8192006857
428
  },
429
  "Positive":{
430
+ "p":0.7291883827,
431
+ "r":0.6821606119,
432
+ "f":0.7048909876
433
  },
434
  "Updates":{
435
+ "p":0.7837246077,
436
+ "r":0.7084891137,
437
+ "f":0.7442102224
438
  },
439
  "InformationTopics":{
440
+ "p":0.7918713029,
441
+ "r":0.8200456611,
442
+ "f":0.8057122551
443
  },
444
  "ConfidenceHigh":{
445
+ "p":0.7615851193,
446
+ "r":0.7704958272,
447
+ "f":0.7660145605
448
  },
449
  "Citation":{
450
+ "p":0.8037907101,
451
+ "r":0.7775338928,
452
+ "f":0.7904443132
453
  },
454
+ "ConfidenceHedged":{
455
+ "p":0.8374410068,
456
+ "r":0.8770594696,
457
+ "f":0.856792489
458
  },
459
  "InformationChange":{
460
+ "p":0.7123025371,
461
+ "r":0.7132585562,
462
+ "f":0.7127802261
 
 
 
 
 
463
  },
464
  "InformationStates":{
465
+ "p":0.8041805777,
466
+ "r":0.8789930188,
467
+ "f":0.8399241946
 
 
 
 
 
468
  },
469
  "FirstPerson":{
470
+ "p":0.8766524888,
471
+ "r":0.8892257463,
472
+ "f":0.882894356
473
  },
474
  "Responsibility":{
475
+ "p":0.7242662257,
476
+ "r":0.6096033403,
477
+ "f":0.6620064236
478
  },
479
  "Inquiry":{
480
+ "p":0.6661799979,
481
+ "r":0.6047524378,
482
+ "f":0.6339817388
483
  },
484
  "InformationChangeNegative":{
485
+ "p":0.6824946846,
486
+ "r":0.5407074677,
487
+ "f":0.6033834586
488
  },
489
  "ConfidenceLow":{
490
+ "p":0.6462395543,
491
+ "r":0.5510688836,
492
+ "f":0.5948717949
493
  },
494
  "InformationPlace":{
495
+ "p":0.865587996,
496
+ "r":0.8944517093,
497
+ "f":0.8797831779
498
+ },
499
+ "InformationReportVerbs":{
500
+ "p":0.7508147746,
501
+ "r":0.7912701252,
502
+ "f":0.7705117932
503
  },
504
  "Future":{
505
+ "p":0.756072781,
506
+ "r":0.7410827449,
507
+ "f":0.7485027202
508
  },
509
  "AcademicWritingMoves":{
510
+ "p":0.7083700441,
511
+ "r":0.4375510204,
512
+ "f":0.5409587889
513
+ },
514
+ "Uncertainty":{
515
+ "p":0.7549221275,
516
+ "r":0.6497218007,
517
+ "f":0.6983824929
518
  },
519
  "CitationHedged":{
520
+ "p":0.7483221477,
521
+ "r":0.9570815451,
522
+ "f":0.8399246704
523
  },
524
  "CitationAuthority":{
525
+ "p":0.7841151386,
526
+ "r":0.5490854797,
527
+ "f":0.6458836443
528
  },
529
  "InformationChangePositive":{
530
+ "p":0.7640586797,
531
+ "r":0.546169531,
532
+ "f":0.6369967725
533
  }
534
  },
535
+ "tok2vec_loss":180447.6574847773,
536
+ "tagger_loss":24009.219291687,
537
+ "ner_loss":59358.4819319265
538
  },
539
  "requirements":[
540
 
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cc11bb5c791bd15c71cccc36896b4851429cd7a428da01ddbebdee8a30c31b0
3
  size 163912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54dd874f25444af84998a48f261e5cddd92e06d5fc7efb7b00f9933afaf47daa
3
  size 163912
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves�
 
1
+ ��moves�
tagger/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1983965d07e1d71cc442defe9818cff84461b64be4d239558dabed5a3dffeeee
3
  size 105978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b0ae90f765f0c0201596b5aab08d92d55f55f4afc45caa5355754d2b029502e
3
  size 105978
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05e2cfc9fcdbf023345f8cd6c80f291f13961dd7b5b233b5793d5b4754a0ac74
3
  size 6009091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c084fd57008cccf20a57ad9d78848ba1a041db525d076b662c0ad6186b9ad4c
3
  size 6009091