browndw commited on
Commit
208cbe9
1 Parent(s): fa7c54c

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,27 +14,27 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.8882993329
18
  - name: NER Recall
19
  type: recall
20
- value: 0.8832277388
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.8857562763
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
- value: 0.9838874397
31
  ---
32
  English pipeline for part-of-speech and rhetorical tagging.
33
 
34
  | Feature | Description |
35
  | --- | --- |
36
  | **Name** | `en_docusco_spacy_fc_trf` |
37
- | **Version** | `1.0` |
38
  | **spaCy** | `>=3.4.3,<3.5.0` |
39
  | **Default Pipeline** | `transformer`, `tagger`, `ner` |
40
  | **Components** | `transformer`, `tagger`, `ner` |
@@ -61,9 +61,9 @@ English pipeline for part-of-speech and rhetorical tagging.
61
  | Type | Score |
62
  | --- | --- |
63
  | `TAG_ACC` | 98.39 |
64
- | `ENTS_F` | 88.58 |
65
- | `ENTS_P` | 88.83 |
66
- | `ENTS_R` | 88.32 |
67
- | `TRANSFORMER_LOSS` | 2313939.76 |
68
- | `TAGGER_LOSS` | 652253.64 |
69
- | `NER_LOSS` | 2048381.71 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.889028963
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.8833963688
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.886203716
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
+ value: 0.9838746739
31
  ---
32
  English pipeline for part-of-speech and rhetorical tagging.
33
 
34
  | Feature | Description |
35
  | --- | --- |
36
  | **Name** | `en_docusco_spacy_fc_trf` |
37
+ | **Version** | `1.1` |
38
  | **spaCy** | `>=3.4.3,<3.5.0` |
39
  | **Default Pipeline** | `transformer`, `tagger`, `ner` |
40
  | **Components** | `transformer`, `tagger`, `ner` |
 
61
  | Type | Score |
62
  | --- | --- |
63
  | `TAG_ACC` | 98.39 |
64
+ | `ENTS_F` | 88.62 |
65
+ | `ENTS_P` | 88.90 |
66
+ | `ENTS_R` | 88.34 |
67
+ | `TRANSFORMER_LOSS` | 2319800.36 |
68
+ | `TAGGER_LOSS` | 669777.78 |
69
+ | `NER_LOSS` | 2048423.35 |
config.cfg CHANGED
@@ -1,6 +1,6 @@
1
  [paths]
2
- train = null
3
- dev = null
4
  vectors = null
5
  init_tok2vec = null
6
 
 
1
  [paths]
2
+ train = "/content/drive/MyDrive/DS Bert/SpacyTrain/spacy_train_cd.spacy"
3
+ dev = "/content/drive/MyDrive/DS Bert/SpacyTrain/spacy_test_cd.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
en_docusco_spacy_fc_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc1cb00ac5e7faaddfe860a1425b0764a8739bffbb0dc74b22ca784cbf686365
3
- size 464898091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db56eaa997e86c8da4c1e0ecb3cfbfe0f26eae97dc57fe04f6f7cbe8f1b37a14
3
+ size 464889500
meta.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "lang":"en",
3
  "name":"docusco_spacy_fc_trf",
4
- "version":"1.0",
5
  "description":"English pipeline for part-of-speech and rhetorical tagging.",
6
  "author":"David Brown",
7
  "email":"dwb2@andrew.cmu.edu",
@@ -307,110 +307,110 @@
307
 
308
  ],
309
  "performance":{
310
- "tag_acc":0.9838874397,
311
- "ents_f":0.8857562763,
312
- "ents_p":0.8882993329,
313
- "ents_r":0.8832277388,
314
  "ents_per_type":{
315
  "ActorsFirstPerson":{
316
- "p":0.9099651568,
317
- "r":0.9155798626,
318
- "f":0.9127638753
319
  },
320
  "ActorsAbstractions":{
321
- "p":0.8877091098,
322
- "r":0.8873245538,
323
- "f":0.8875167902
324
  },
325
  "SentimentPositive":{
326
- "p":0.8424859908,
327
- "r":0.8302208835,
328
- "f":0.8363084703
329
  },
330
  "ActorsPeople":{
331
- "p":0.9245164475,
332
- "r":0.9314975583,
333
- "f":0.9279938737
334
  },
335
  "SignpostingMetadiscourse":{
336
- "p":0.9420821114,
337
- "r":0.9216975493,
338
- "f":0.9317783552
339
  },
340
  "OrganizationReasoning":{
341
- "p":0.9065213002,
342
- "r":0.8960287368,
343
- "f":0.9012444801
344
  },
345
  "SentimentNegative":{
346
- "p":0.826066254,
347
- "r":0.8137773906,
348
- "f":0.8198757764
349
  },
350
  "OrganizationNarrative":{
351
- "p":0.8930481283,
352
- "r":0.8644067797,
353
- "f":0.8784940691
354
  },
355
  "ActorsPublicEntities":{
356
- "p":0.9016686532,
357
- "r":0.9000594884,
358
- "f":0.9008633522
359
  },
360
  "ConfidenceHedged":{
361
- "p":0.9001426534,
362
- "r":0.9029765312,
363
- "f":0.9015573653
364
  },
365
  "StanceEmphatic":{
366
- "p":0.8945487042,
367
- "r":0.9087607808,
368
- "f":0.901598739
369
  },
370
  "ConfidenceHigh":{
371
- "p":0.8826548067,
372
- "r":0.8637428858,
373
- "f":0.8730964467
374
  },
375
  "PlanningFuture":{
376
- "p":0.8868312757,
377
- "r":0.8990404673,
378
- "f":0.8928941371
379
  },
380
  "SignpostingAcademicWritingMoves":{
381
- "p":0.76987061,
382
- "r":0.7642201835,
383
- "f":0.7670349908
384
  },
385
  "PlanningStrategy":{
386
- "p":0.8410391898,
387
- "r":0.8340611354,
388
- "f":0.8375356282
389
  },
390
  "CitationAuthority":{
391
- "p":0.847715736,
392
- "r":0.8199672668,
393
- "f":0.8336106489
394
  },
395
  "StanceModerated":{
396
- "p":0.8598014888,
397
- "r":0.8828025478,
398
- "f":0.87115022
399
  },
400
  "CitationNeutral":{
401
- "p":0.8945945946,
402
- "r":0.888590604,
403
- "f":0.8915824916
404
  },
405
  "CitationControversy":{
406
- "p":0.8925619835,
407
- "r":0.8925619835,
408
- "f":0.8925619835
409
  }
410
  },
411
- "transformer_loss":23139.3976026688,
412
- "tagger_loss":6522.5364216973,
413
- "ner_loss":20483.8170848669
414
  },
415
  "requirements":[
416
  "spacy-transformers>=1.1.8,<1.2.0"
 
1
  {
2
  "lang":"en",
3
  "name":"docusco_spacy_fc_trf",
4
+ "version":"1.1",
5
  "description":"English pipeline for part-of-speech and rhetorical tagging.",
6
  "author":"David Brown",
7
  "email":"dwb2@andrew.cmu.edu",
 
307
 
308
  ],
309
  "performance":{
310
+ "tag_acc":0.9838746739,
311
+ "ents_f":0.886203716,
312
+ "ents_p":0.889028963,
313
+ "ents_r":0.8833963688,
314
  "ents_per_type":{
315
  "ActorsFirstPerson":{
316
+ "p":0.9048672566,
317
+ "r":0.9176833544,
318
+ "f":0.9112302444
319
  },
320
  "ActorsAbstractions":{
321
+ "p":0.8884982639,
322
+ "r":0.8868047132,
323
+ "f":0.8876506808
324
  },
325
  "SentimentPositive":{
326
+ "p":0.8560008306,
327
+ "r":0.827811245,
328
+ "f":0.8416700694
329
  },
330
  "ActorsPeople":{
331
+ "p":0.9271072667,
332
+ "r":0.9305028034,
333
+ "f":0.9288019317
334
  },
335
  "SignpostingMetadiscourse":{
336
+ "p":0.9420750336,
337
+ "r":0.9215780036,
338
+ "f":0.9317138023
339
  },
340
  "OrganizationReasoning":{
341
+ "p":0.9138317376,
342
+ "r":0.8952304929,
343
+ "f":0.9044354839
344
  },
345
  "SentimentNegative":{
346
+ "p":0.8280952381,
347
+ "r":0.8157206996,
348
+ "f":0.8218613915
349
  },
350
  "OrganizationNarrative":{
351
+ "p":0.8888659154,
352
+ "r":0.8726276261,
353
+ "f":0.8806719246
354
  },
355
  "ActorsPublicEntities":{
356
+ "p":0.913087316,
357
+ "r":0.8978782471,
358
+ "f":0.9054189162
359
  },
360
  "ConfidenceHedged":{
361
+ "p":0.9044895625,
362
+ "r":0.9052661706,
363
+ "f":0.9048776999
364
  },
365
  "StanceEmphatic":{
366
+ "p":0.864783265,
367
+ "r":0.9101225601,
368
+ "f":0.8868738251
369
  },
370
  "ConfidenceHigh":{
371
+ "p":0.8696095076,
372
+ "r":0.8573819886,
373
+ "f":0.8634524612
374
  },
375
  "PlanningFuture":{
376
+ "p":0.8828828829,
377
+ "r":0.8994576554,
378
+ "f":0.8910932011
379
  },
380
  "SignpostingAcademicWritingMoves":{
381
+ "p":0.7609090909,
382
+ "r":0.7678899083,
383
+ "f":0.7643835616
384
  },
385
  "PlanningStrategy":{
386
+ "p":0.8513819665,
387
+ "r":0.8205240175,
388
+ "f":0.8356682233
389
  },
390
  "CitationAuthority":{
391
+ "p":0.8544839255,
392
+ "r":0.8265139116,
393
+ "f":0.840266223
394
  },
395
  "StanceModerated":{
396
+ "p":0.8590852905,
397
+ "r":0.8853503185,
398
+ "f":0.8720200753
399
  },
400
  "CitationNeutral":{
401
+ "p":0.8832214765,
402
+ "r":0.8832214765,
403
+ "f":0.8832214765
404
  },
405
  "CitationControversy":{
406
+ "p":0.8739837398,
407
+ "r":0.8884297521,
408
+ "f":0.881147541
409
  }
410
  },
411
+ "transformer_loss":23198.0035903843,
412
+ "tagger_loss":6697.7777622218,
413
+ "ner_loss":20484.2334804777
414
  },
415
  "requirements":[
416
  "spacy-transformers>=1.1.8,<1.2.0"
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves�P{"0":{},"1":{"ActorsAbstractions":574525,"SentimentNegative":505726,"ActorsPeople":489704,"SentimentPositive":329499,"OrganizationNarrative":327796,"SignpostingMetadiscourse":285541,"ActorsFirstPerson":242622,"OrganizationReasoning":182971,"StanceEmphatic":145253,"ActorsPublicEntities":141386,"ConfidenceHedged":130515,"ConfidenceHigh":119839,"PlanningFuture":91199,"PlanningStrategy":77436,"SignpostingAcademicWritingMoves":45355,"CitationNeutral":28827,"StanceModerated":24981,"CitationAuthority":24697,"CitationControversy":7780},"2":{"ActorsAbstractions":574525,"SentimentNegative":505726,"ActorsPeople":489704,"SentimentPositive":329499,"OrganizationNarrative":327796,"SignpostingMetadiscourse":285541,"ActorsFirstPerson":242622,"OrganizationReasoning":182971,"StanceEmphatic":145253,"ActorsPublicEntities":141386,"ConfidenceHedged":130515,"ConfidenceHigh":119839,"PlanningFuture":91199,"PlanningStrategy":77436,"SignpostingAcademicWritingMoves":45355,"CitationNeutral":28827,"StanceModerated":24981,"CitationAuthority":24697,"CitationControversy":7780},"3":{"ActorsAbstractions":574525,"SentimentNegative":505726,"ActorsPeople":489704,"SentimentPositive":329499,"OrganizationNarrative":327796,"SignpostingMetadiscourse":285541,"ActorsFirstPerson":242622,"OrganizationReasoning":182971,"StanceEmphatic":145253,"ActorsPublicEntities":141386,"ConfidenceHedged":130515,"ConfidenceHigh":119839,"PlanningFuture":91199,"PlanningStrategy":77436,"SignpostingAcademicWritingMoves":45355,"CitationNeutral":28827,"StanceModerated":24981,"CitationAuthority":24697,"CitationControversy":7780},"4":{"ActorsAbstractions":574525,"SentimentNegative":505726,"ActorsPeople":489704,"SentimentPositive":329499,"OrganizationNarrative":327796,"SignpostingMetadiscourse":285541,"ActorsFirstPerson":242622,"OrganizationReasoning":182971,"StanceEmphatic":145253,"ActorsPublicEntities":141386,"ConfidenceHedged":130515,"ConfidenceHigh":119839,"PlanningFuture":91199,"PlanningStrategy":77436,"SignpostingAcademicWritingMoves":45355,"CitationNeutral":28827,"StanceModerated":24981,"CitationAuthority":24697,"CitationControversy":7780,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves�P{"0":{},"1":{"ActorsAbstractions":574627,"SentimentNegative":505726,"ActorsPeople":489704,"SentimentPositive":329499,"OrganizationNarrative":327796,"SignpostingMetadiscourse":285541,"ActorsFirstPerson":242622,"OrganizationReasoning":182971,"StanceEmphatic":148905,"ActorsPublicEntities":141386,"ConfidenceHedged":130515,"ConfidenceHigh":119696,"PlanningFuture":91199,"PlanningStrategy":77436,"SignpostingAcademicWritingMoves":45355,"CitationNeutral":28827,"StanceModerated":24981,"CitationAuthority":24697,"CitationControversy":7780},"2":{"ActorsAbstractions":574627,"SentimentNegative":505726,"ActorsPeople":489704,"SentimentPositive":329499,"OrganizationNarrative":327796,"SignpostingMetadiscourse":285541,"ActorsFirstPerson":242622,"OrganizationReasoning":182971,"StanceEmphatic":148905,"ActorsPublicEntities":141386,"ConfidenceHedged":130515,"ConfidenceHigh":119696,"PlanningFuture":91199,"PlanningStrategy":77436,"SignpostingAcademicWritingMoves":45355,"CitationNeutral":28827,"StanceModerated":24981,"CitationAuthority":24697,"CitationControversy":7780},"3":{"ActorsAbstractions":574627,"SentimentNegative":505726,"ActorsPeople":489704,"SentimentPositive":329499,"OrganizationNarrative":327796,"SignpostingMetadiscourse":285541,"ActorsFirstPerson":242622,"OrganizationReasoning":182971,"StanceEmphatic":148905,"ActorsPublicEntities":141386,"ConfidenceHedged":130515,"ConfidenceHigh":119696,"PlanningFuture":91199,"PlanningStrategy":77436,"SignpostingAcademicWritingMoves":45355,"CitationNeutral":28827,"StanceModerated":24981,"CitationAuthority":24697,"CitationControversy":7780},"4":{"ActorsAbstractions":574627,"SentimentNegative":505726,"ActorsPeople":489704,"SentimentPositive":329499,"OrganizationNarrative":327796,"SignpostingMetadiscourse":285541,"ActorsFirstPerson":242622,"OrganizationReasoning":182971,"StanceEmphatic":148905,"ActorsPublicEntities":141386,"ConfidenceHedged":130515,"ConfidenceHigh":119696,"PlanningFuture":91199,"PlanningStrategy":77436,"SignpostingAcademicWritingMoves":45355,"CitationNeutral":28827,"StanceModerated":24981,"CitationAuthority":24697,"CitationControversy":7780,"":1},"5":{"":1}}�cfg��neg_key�
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7aaa4b7927348740b8700c2178d2e6d129e0afcbf9f52c98bfdd039442809b6
3
  size 502030632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22576948b84086ef0634f91f089cb600a12dcf97e5c37e27caf9ddf1d2cebfb8
3
  size 502030632