NER_conllpp / ctfidf_config.json
wizardofchance's picture
Add BERTopic model
25f9d37 verified
{
"ctfidf_model": {
"bm25_weighting": false,
"reduce_frequent_words": false
},
"vectorizer_model": {
"params": {
"analyzer": "word",
"binary": false,
"decode_error": "strict",
"encoding": "utf-8",
"input": "content",
"lowercase": true,
"max_df": 1.0,
"max_features": null,
"min_df": 1,
"ngram_range": [
1,
1
],
"stop_words": "english",
"strip_accents": null,
"token_pattern": "(?u)\\b\\w\\w+\\b",
"vocabulary": null
},
"vocab": {
"united": 805,
"nations": 506,
"marking": 468,
"seventieth": 704,
"anniversary": 62,
"year": 840,
"making": 461,
"session": 702,
"general": 336,
"assembly": 81,
"historic": 363,
"hope": 370,
"terms": 759,
"outcomes": 541,
"like": 441,
"assure": 85,
"president": 594,
"receive": 641,
"india": 393,
"support": 746,
"efforts": 265,
"seventy": 705,
"years": 841,
"ago": 51,
"foundations": 327,
"laid": 424,
"san": 696,
"francisco": 329,
"conference": 159,
"city": 136,
"west": 830,
"coast": 142,
"country": 193,
"signatory": 710,
"countries": 192,
"charter": 131,
"time": 770,
"independent": 392,
"obtained": 527,
"independence": 391,
"later": 427,
"established": 292,
"diminutive": 236,
"looking": 451,
"man": 463,
"powerful": 586,
"weapon": 828,
"non": 521,
"violence": 818,
"writing": 839,
"final": 314,
"act": 36,
"struggle": 739,
"symbol": 750,
"colonized": 145,
"oppressed": 537,
"grateful": 345,
"proclaimed": 607,
"birthday": 104,
"extraordinary": 304,
"international": 404,
"day": 208,
"pleasing": 577,
"coincidence": 143,
"tomorrow": 774,
"october": 531,
"and42": 61,
"55": 27,
"15": 4,
"29658": 21,
"01": 1,
"10": 2,
"2015": 12,
"70": 30,
"pv": 626,
"22that": 19,
"celebrating": 124,
"mahatma": 456,
"gandhi": 334,
"said": 694,
"difference": 233,
"capable": 118,
"doing": 249,
"suffice": 744,
"solve": 724,
"world": 838,
"problems": 604,
"message": 482,
"fitting": 320,
"context": 174,
"today": 772,
"ravaged": 634,
"war": 825,
"continents": 175,
"security": 698,
"council": 189,
"powerless": 587,
"unwilling": 807,
"stem": 736,
"low": 454,
"blood": 105,
"traditional": 779,
"solutions": 723,
"rely": 661,
"force": 323,
"exacerbated": 295,
"ask": 78,
"political": 584,
"craft": 198,
"alternatives": 55,
"conflict": 160,
"pursue": 625,
"commitment": 148,
"single": 714,
"minded": 490,
"dedication": 215,
"goal": 341,
"important": 384,
"peacekeeping": 565,
"blue": 106,
"flag": 321,
"men": 480,
"women": 836,
"constantly": 167,
"working": 837,
"prevent": 596,
"protect": 617,
"civilians": 137,
"sustain": 747,
"peace": 562,
"processes": 606,
"180": 6,
"000": 0,
"peacekeepers": 564,
"deployed": 226,
"far": 310,
"largest": 426,
"contributor": 182,
"provided": 620,
"indian": 394,
"military": 487,
"police": 581,
"personnel": 570,
"participating": 554,
"missions": 495,
"operating": 534,
"highly": 362,
"challenging": 129,
"environments": 284,
"remains": 663,
"committed": 150,
"continuing": 178,
"operations": 535,
"enhancing": 279,
"contributions": 181,
"announced": 63,
"prime": 600,
"minister": 491,
"leaders": 429,
"summit": 745,
"new": 518,
"cover": 197,
"aspects": 80,
"enablers": 274,
"training": 783,
"dilution": 235,
"cardinal": 121,
"principles": 602,
"occur": 530,
"fact": 308,
"troop": 791,
"contributing": 180,
"role": 687,
"formulation": 325,
"mandates": 465,
"amended": 57,
"consultation": 170,
"matter": 470,
"concern": 156,
"clear": 139,
"violation": 817,
"article": 75,
"44": 24,
"believe": 101,
"substitute": 741,
"high": 361,
"level": 437,
"panel": 550,
"underscored": 800,
"mark": 467,
"opportunity": 536,
"pay": 561,
"tribute": 790,
"300": 22,
"including": 389,
"161": 5,
"ultimate": 796,
"sacrifice": 691,
"stand": 730,
"ready": 638,
"contribute": 179,
"memorial": 479,
"wall": 822,
"approved": 69,
"ninth": 520,
"safety": 693,
"future": 333,
"community": 152,
"depend": 225,
"respond": 672,
"greatest": 346,
"threat": 767,
"face": 307,
"terrorism": 761,
"lived": 446,
"quarter": 628,
"century": 126,
"tragically": 782,
"brought": 112,
"home": 368,
"autumn": 89,
"2001": 10,
"adequately": 41,
"countered": 191,
"proliferation": 610,
"terrorist": 762,
"acts": 38,
"rise": 685,
"extremist": 306,
"ideologies": 380,
"impunity": 386,
"states": 735,
"organized": 539,
"action": 37,
"defeat": 217,
"demonstrate": 224,
"zero": 846,
"tolerance": 773,
"terrorists": 763,
"kill": 419,
"maim": 457,
"innocent": 399,
"base": 97,
"principle": 601,
"prosecute": 614,
"extradite": 303,
"make": 460,
"provide": 619,
"financing": 317,
"safe": 692,
"havens": 355,
"arming": 73,
"heavy": 357,
"price": 599,
"equally": 288,
"establishment": 293,
"legal": 432,
"regime": 655,
"comprehensive": 154,
"convention": 184,
"longer": 449,
"held": 358,
"nineteen": 519,
"1996": 9,
"proposed": 613,
"member": 477,
"unable": 797,
"adopt": 42,
"mired": 494,
"issue": 411,
"definition": 218,
"understand": 801,
"distinction": 247,
"exists": 298,
"good": 343,
"bad": 94,
"linked15": 444,
"43": 23,
"22": 17,
"2015to": 14,
"religion": 660,
"commits": 149,
"crimes": 202,
"humanity": 376,
"appeal": 66,
"come": 147,
"pledge": 578,
"unanimously": 799,
"subject": 740,
"share": 707,
"challenges": 128,
"ties": 769,
"pakistan": 548,
"accept": 32,
"legitimate": 434,
"instrument": 403,
"statecraft": 733,
"shared": 708,
"outrage": 543,
"2008": 11,
"mumbai": 502,
"terror": 760,
"attacks": 87,
"citizens": 135,
"helplessly": 360,
"butchered": 116,
"mastermind": 469,
"attack": 86,
"walking": 821,
"free": 330,
"affront": 47,
"entire": 282,
"past": 559,
"assurances": 84,
"regard": 653,
"honoured": 369,
"cross": 206,
"border": 109,
"taken": 753,
"place": 571,
"recently": 643,
"captured": 120,
"alive": 54,
"know": 420,
"meant": 473,
"destabilize": 228,
"legitimize": 435,
"illegal": 381,
"occupation": 529,
"parts": 558,
"state": 732,
"jammu": 413,
"kashmir": 417,
"claim": 138,
"rest": 677,
"let": 436,
"use": 810,
"occasion": 528,
"spell": 727,
"approach": 68,
"clearly": 140,
"open": 533,
"dialogue": 232,
"finds": 318,
"talks": 756,
"incompatible": 390,
"yesterday": 843,
"termed": 758,
"point": 579,
"initiative": 398,
"need": 510,
"points": 580,
"just": 416,
"sit": 715,
"talk": 755,
"precisely": 589,
"discussed": 242,
"decided": 212,
"ministers": 493,
"ufa": 795,
"july": 415,
"hold": 366,
"national": 504,
"advisers": 44,
"issues": 412,
"connected": 164,
"arrange": 74,
"early": 253,
"meeting": 476,
"directors": 237,
"address": 40,
"situation": 716,
"response": 673,
"credible": 201,
"prepared": 592,
"outstanding": 544,
"bilateral": 102,
"counter": 190,
"menace": 481,
"acknowledge": 35,
"real": 639,
"social": 720,
"economic": 259,
"progress": 609,
"critical": 205,
"elimination": 267,
"basic": 99,
"human": 374,
"want": 823,
"leads": 431,
"invariably": 405,
"peaceful": 563,
"societies": 721,
"evidenced": 294,
"map": 466,
"conflicts": 161,
"engulf": 278,
"days": 209,
"adopted": 43,
"2030": 16,
"agenda": 50,
"sustainable": 748,
"development": 231,
"resolution": 669,
"goals": 342,
"core": 187,
"constitute": 168,
"blueprint": 107,
"holistic": 367,
"millennium": 488,
"successful": 743,
"implementation": 383,
"require": 668,
"resolve": 670,
"inclination": 387,
"toshare": 777,
"technology": 757,
"financial": 316,
"resources": 671,
"simultaneously": 713,
"improving": 385,
"delivery": 221,
"mechanisms": 474,
"woman": 835,
"elected": 266,
"parliament": 553,
"firm": 319,
"conviction": 185,
"shortcut": 709,
"change": 130,
"empowering": 272,
"girl": 338,
"child": 132,
"government": 344,
"policy": 582,
"programme": 608,
"educate": 262,
"empower": 271,
"conceived": 155,
"vision": 819,
"rests": 679,
"building": 113,
"planet": 573,
"children": 133,
"prepare": 591,
"meet": 475,
"months": 499,
"paris": 552,
"expects": 301,
"deliver": 219,
"ambitious": 56,
"agreement": 52,
"climate": 141,
"duty": 252,
"common": 151,
"mind": 489,
"larger": 425,
"historical": 364,
"differentiated": 234,
"responsibilities": 674,
"used": 811,
"needs": 512,
"greed": 347,
"adapting": 39,
"lifestyle": 440,
"choices": 134,
"reducing": 647,
"extravagant": 305,
"consumption": 171,
"help": 359,
"correct": 188,
"course": 195,
"reason": 640,
"equitable": 290,
"delivering": 220,
"concrete": 157,
"developing": 230,
"enabled": 273,
"developed": 229,
"areas": 72,
"finance": 315,
"transfer": 784,
"capacity": 119,
"willing": 831,
"partner": 555,
"global": 340,
"play": 575,
"reaching": 637,
"meaningful": 472,
"effective": 263,
"reverence": 681,
"environment": 283,
"based": 98,
"traditions": 780,
"nature": 508,
"sacred": 690,
"health": 356,
"tied": 768,
"guided": 350,
"yoga": 844,
"celebrated": 123,
"fervour": 313,
"192": 7,
"hallowed": 353,
"institution": 401,
"thank": 765,
"enthusiasm": 280,
"seed": 699,
"planted": 574,
"grow": 348,
"magnificent": 455,
"tree": 789,
"congratulate": 163,
"prompt": 612,
"tackling": 752,
"epidemics": 286,
"linking": 445,
"managed": 464,
"eradicate": 291,
"smallpox": 719,
"nearly": 509,
"polio": 583,
"hiv": 365,
"aids": 53,
"controlled": 183,
"ebola": 258,
"outbreak": 540,
"contained": 172,
"unacceptably": 798,
"lives": 447,
"paid": 547,
"epidemic": 285,
"example": 296,
"crisis": 204,
"proud": 618,
"that44": 766,
"22india": 18,
"played": 576,
"inspired": 400,
"successes": 742,
"redouble": 645,
"diseases": 245,
"tuberculosis": 794,
"malaria": 462,
"natural": 507,
"disasters": 240,
"deserve": 227,
"coordinated": 186,
"recent": 642,
"wanting": 824,
"refugee": 652,
"caused": 122,
"situations": 717,
"syria": 751,
"iraq": 408,
"libya": 438,
"needed": 511,
"swift": 749,
"backed": 93,
"humanitarian": 375,
"crises": 203,
"neighbourhood": 515,
"quick": 631,
"responsive": 676,
"nepal": 516,
"yemen": 842,
"emerged": 269,
"net": 517,
"provider": 621,
"assisting": 82,
"nationals": 505,
"sought": 725,
"hosting": 372,
"asian": 77,
"ministerial": 492,
"disaster": 239,
"risk": 686,
"reduction": 648,
"november": 525,
"2016": 15,
"topic": 776,
"sendai": 701,
"framework": 328,
"walked": 820,
"long": 448,
"way": 826,
"25": 20,
"billion": 103,
"countrymen": 194,
"attest": 88,
"happy": 354,
"knowledge": 421,
"democratic": 223,
"deeply": 216,
"embedded": 268,
"nonetheless": 522,
"transformative": 786,
"reinvigorate": 657,
"nation": 503,
"redeem": 644,
"spirit": 728,
"modi": 497,
"took": 775,
"office": 532,
"half": 351,
"renewal": 665,
"rightful": 683,
"affairs": 45,
"fastest": 311,
"growing": 349,
"major": 459,
"economy": 261,
"driving": 251,
"premised": 590,
"motto": 500,
"sabka": 689,
"saath": 688,
"vikas": 816,
"places": 572,
"emphasis": 270,
"welfare": 829,
"people": 566,
"africa": 48,
"region": 656,
"bonds": 108,
"solidarity": 722,
"born": 110,
"colonialism": 144,
"belief": 100,
"prosperity": 615,
"month": 498,
"host": 371,
"forum": 326,
"54": 26,
"african": 49,
"invited": 407,
"nuclear": 526,
"aware": 91,
"responsibility": 675,
"discriminatory": 241,
"verifiable": 813,
"disarmament": 238,
"undiminished": 802,
"life": 439,
"milestone": 486,
"look": 450,
"reflect": 649,
"achieved": 34,
"lost": 453,
"similarly": 712,
"associated": 83,
"provides": 622,
"analyse": 59,
"fulfilled": 331,
"purpose": 624,
"set": 703,
"question": 629,
"affirmative": 46,
"answer": 64,
"questions": 630,
"negative": 513,
"preventing": 597,
"decolonization": 214,
"dismantling": 246,
"apartheid": 65,
"combating": 146,
"hunger": 378,
"promoting": 611,
"democracy": 222,
"rights": 684,
"able": 31,
"taking": 754,
"various": 812,
"permanent": 568,
"path": 560,
"treading": 788,
"according": 33,
"parameters": 551,
"appears": 67,
"ineffective": 395,
"maintaining": 458,
"failed": 309,
"effectively": 264,
"area": 71,
"started": 731,
"reach": 635,
"enthusiastically": 281,
"partners": 556,
"focused": 322,
"foremost": 324,
"immediate": 382,
"results": 680,
"truly": 792,
"transformational": 785,
"reached": 636,
"extended": 302,
"renewing": 666,
"ancient": 60,
"linkages": 443,
"constructing": 169,
"modern": 496,
"partnerships": 557,
"14": 3,
"pacific": 546,
"island": 410,
"east": 256,
"replaced": 667,
"previous": 598,
"vigorous": 815,
"proactive": 603,
"engagement": 277,
"economically": 260,
"vibrant": 814,
"complements": 153,
"link": 442,
"remain": 662,
"middle": 484,
"process": 605,
"key": 418,
"radicalization": 632,
"continue": 176,
"asolution": 79,
"palestinian": 549,
"qualitatively": 627,
"upgraded": 808,
"relations": 658,
"powers": 588,
"institutions": 402,
"periodic": 567,
"organization": 538,
"meaning": 471,
"continues": 177,
"dominated": 250,
"wealthy": 827,
"influential": 397,
"notion": 524,
"sovereign": 726,
"equality": 287,
"permitted": 569,
"unfair": 803,
"norms": 523,
"fundamental": 332,
"challenge": 127,
"inequity": 396,
"built": 114,
"preserve": 593,
"centrality": 125,
"legitimacy": 433,
"custodian": 207,
"urgent": 809,
"pressing": 595,
"reform": 651,
"hour": 373,
"reflects": 650,
"geopolitical": 337,
"architecture": 70,
"1945": 8,
"does": 248,
"latin": 428,
"america": 58,
"seats": 697,
"members": 478,
"include": 388,
"decision": 213,
"structures": 738,
"business": 115,
"away": 92,
"outdated": 542,
"transparent": 787,
"methods": 483,
"giving": 339,
"balance": 95,
"restore": 678,
"credibility": 200,
"equip": 289,
"confront": 162,
"times": 771,
"leadership": 430,
"excellences": 297,
"mr": 501,
"sam": 695,
"kutesa": 422,
"courtenay": 196,
"rattray": 633,
"decades": 211,
"discussions": 244,
"text": 764,
"negotiations": 514,
"69": 29,
"560": 28,
"step": 737,
"springboard": 729,
"unique": 804,
"significance": 711,
"revitalization": 682,
"invite": 406,
"hall": 352,
"contemplate": 173,
"idea": 379,
"as15": 76,
"45": 25,
"2015a": 13,
"banyan": 96,
"eastern": 257,
"tradition": 778,
"connotes": 165,
"wisdom": 832,
"judgemental": 414,
"encompassing": 275,
"trunk": 793,
"outwards": 545,
"sky": 718,
"earth": 254,
"expansive": 300,
"branches": 111,
"shade": 706,
"relief": 659,
"discussion": 243,
"debate": 210,
"unlike": 806,
"young": 845,
"constant": 166,
"regeneration": 654,
"expansion": 299,
"withers": 834,
"fate": 312,
"awaits": 90,
"renew": 664,
"condemn": 158,
"irrelevance": 409,
"tragic": 781,
"withering": 833,
"easily": 255,
"lose": 452,
"labyrinth": 423,
"creation": 199,
"seize": 700,
"potential": 585,
"reduce": 646,
"gap": 335,
"mighty": 485,
"providing": 623,
"canopy": 117,
"prosperous": 616,
"humankind": 377,
"end": 276,
"statement": 734
}
}
}