topic_modelling / ctfidf_config.json
guibvieira's picture
Add BERTopic model
8823443
{
"ctfidf_model": {
"bm25_weighting": false,
"reduce_frequent_words": false
},
"vectorizer_model": {
"params": {
"analyzer": "word",
"binary": false,
"decode_error": "strict",
"encoding": "utf-8",
"input": "content",
"lowercase": true,
"max_df": 0.9,
"max_features": null,
"min_df": 0.05,
"ngram_range": [
1,
1
],
"stop_words": "english",
"strip_accents": "ascii",
"token_pattern": "(?u)\\b\\w\\w+\\b",
"vocabulary": null
},
"vocab": {
"token": 560,
"crypto": 115,
"announcement": 25,
"come": 93,
"soon": 511,
"good": 219,
"morning": 361,
"community": 97,
"hope": 254,
"day": 123,
"ahead": 16,
"questions": 440,
"need": 367,
"feel": 185,
"free": 197,
"ask": 31,
"channel": 80,
"won": 621,
"going": 216,
"happen": 232,
"don": 146,
"think": 552,
"big": 47,
"team": 541,
"like": 304,
"address": 11,
"regarding": 455,
"recent": 452,
"number": 378,
"event": 165,
"new": 371,
"rewards": 466,
"change": 78,
"left": 298,
"sense": 488,
"receive": 450,
"time": 557,
"understand": 578,
"taken": 535,
"benefits": 44,
"world": 628,
"bitcoin": 51,
"biggest": 48,
"cryptocurrency": 116,
"run": 470,
"says": 478,
"guys": 228,
"doing": 145,
"today": 559,
"read": 445,
"just": 283,
"really": 448,
"glad": 212,
"got": 221,
"win": 616,
"game": 204,
"different": 138,
"bro": 58,
"look": 321,
"yeah": 636,
"swap": 533,
"earn": 153,
"week": 612,
"ago": 13,
"rate": 443,
"sure": 532,
"kindly": 287,
"question": 439,
"sorry": 512,
"message": 350,
"reply": 461,
"mean": 344,
"say": 476,
"hello": 242,
"daily": 119,
"live": 315,
"comment": 96,
"awesome": 39,
"eth": 163,
"holders": 251,
"currently": 118,
"using": 591,
"worth": 630,
"know": 288,
"potential": 420,
"course": 111,
"profits": 433,
"haha": 229,
"buying": 67,
"make": 333,
"wait": 600,
"price": 425,
"far": 180,
"ones": 385,
"used": 588,
"talk": 538,
"group": 223,
"bit": 50,
"waiting": 601,
"news": 372,
"stuff": 525,
"hand": 231,
"people": 398,
"pls": 411,
"choose": 85,
"massive": 339,
"interesting": 272,
"wow": 631,
"cool": 107,
"thought": 555,
"things": 551,
"happy": 236,
"upcoming": 581,
"join": 281,
"discord": 141,
"enjoy": 160,
"way": 608,
"exciting": 170,
"participate": 394,
"pm": 413,
"utc": 593,
"sent": 489,
"friends": 199,
"usdt": 586,
"level": 302,
"minimum": 353,
"reward": 465,
"send": 487,
"late": 291,
"announced": 24,
"working": 626,
"days": 124,
"create": 113,
"multiple": 363,
"accounts": 5,
"hit": 248,
"needed": 368,
"rest": 464,
"luck": 330,
"high": 246,
"thats": 549,
"account": 4,
"having": 238,
"hold": 250,
"said": 473,
"saying": 477,
"current": 117,
"market": 337,
"leave": 297,
"drop": 149,
"makes": 334,
"released": 459,
"coin": 91,
"trading": 567,
"pepe": 399,
"claim": 86,
"june": 282,
"members": 348,
"want": 604,
"use": 587,
"telegram": 543,
"chat": 82,
"trying": 574,
"fake": 178,
"old": 384,
"did": 134,
"release": 458,
"ready": 446,
"consider": 102,
"unique": 579,
"goes": 215,
"coins": 92,
"twitter": 576,
"thing": 550,
"let": 300,
"didn": 135,
"answer": 26,
"btc": 60,
"finally": 187,
"long": 319,
"excited": 169,
"focus": 191,
"positive": 417,
"defi": 127,
"ama": 22,
"hours": 257,
"ways": 609,
"gon": 217,
"na": 364,
"register": 456,
"ecosystem": 157,
"admin": 12,
"forward": 196,
"mate": 340,
"wo": 620,
"later": 292,
"alright": 21,
"check": 83,
"true": 571,
"lost": 325,
"details": 131,
"huge": 258,
"deposit": 130,
"total": 565,
"click": 88,
"information": 269,
"needs": 369,
"tomorrow": 563,
"knows": 290,
"tried": 570,
"work": 625,
"night": 375,
"came": 70,
"link": 307,
"actually": 8,
"welcome": 614,
"protocol": 436,
"users": 590,
"access": 2,
"liquidity": 309,
"platform": 408,
"continue": 104,
"point": 414,
"brother": 59,
"write": 632,
"shows": 497,
"version": 596,
"years": 638,
"based": 41,
"blockchain": 53,
"trust": 572,
"wallet": 602,
"sir": 503,
"hear": 240,
"ve": 595,
"following": 193,
"great": 222,
"project": 434,
"talking": 539,
"million": 351,
"play": 409,
"able": 0,
"money": 358,
"anymore": 27,
"turn": 575,
"social": 508,
"tho": 554,
"received": 451,
"active": 7,
"hard": 237,
"does": 143,
"control": 106,
"payment": 397,
"works": 627,
"took": 564,
"place": 405,
"position": 416,
"solution": 510,
"dear": 126,
"suggest": 528,
"support": 531,
"thank": 547,
"network": 370,
"heard": 241,
"ok": 382,
"trade": 566,
"lol": 318,
"development": 133,
"process": 430,
"building": 64,
"takes": 536,
"face": 175,
"tokens": 561,
"connect": 101,
"chain": 76,
"real": 447,
"family": 179,
"didnt": 136,
"post": 419,
"dont": 147,
"comes": 94,
"hmm": 249,
"try": 573,
"friend": 198,
"easily": 155,
"yea": 635,
"remember": 460,
"tell": 544,
"investing": 274,
"getting": 208,
"lose": 324,
"help": 243,
"dm": 142,
"miss": 355,
"life": 303,
"safe": 472,
"contract": 105,
"marketing": 338,
"max": 342,
"buy": 66,
"sell": 485,
"ca": 68,
"mind": 352,
"likely": 305,
"home": 253,
"quite": 442,
"starting": 520,
"chance": 77,
"added": 10,
"mentioned": 349,
"according": 3,
"correct": 108,
"end": 159,
"start": 518,
"phone": 404,
"times": 558,
"tg": 546,
"ya": 634,
"hi": 245,
"online": 386,
"started": 519,
"nice": 374,
"available": 36,
"funds": 202,
"created": 114,
"wrong": 633,
"looking": 322,
"seeing": 483,
"country": 109,
"super": 529,
"sounds": 513,
"issue": 278,
"stay": 521,
"note": 377,
"interested": 271,
"launch": 294,
"projects": 435,
"pretty": 423,
"early": 152,
"second": 480,
"guide": 226,
"im": 262,
"links": 308,
"date": 122,
"related": 457,
"spot": 517,
"better": 46,
"app": 28,
"email": 158,
"build": 63,
"yes": 639,
"coming": 95,
"pool": 415,
"lots": 327,
"stop": 523,
"playing": 410,
"nft": 373,
"ethereum": 164,
"listed": 311,
"withdrawal": 619,
"person": 402,
"fees": 186,
"higher": 247,
"sold": 509,
"value": 594,
"update": 582,
"tbh": 540,
"space": 514,
"year": 637,
"set": 492,
"allowed": 19,
"list": 310,
"possible": 418,
"thanks": 548,
"share": 493,
"future": 203,
"key": 284,
"successful": 527,
"research": 463,
"contact": 103,
"investment": 275,
"plan": 406,
"smart": 507,
"games": 205,
"absolutely": 1,
"told": 562,
"taking": 537,
"transactions": 569,
"plus": 412,
"volume": 599,
"open": 387,
"reason": 449,
"private": 426,
"website": 611,
"pay": 396,
"bring": 57,
"oh": 381,
"agree": 14,
"looks": 323,
"case": 73,
"rules": 469,
"buddy": 62,
"deal": 125,
"search": 479,
"code": 90,
"lmao": 317,
"worry": 629,
"guy": 227,
"care": 72,
"okay": 383,
"btw": 61,
"learn": 296,
"enter": 161,
"single": 502,
"previous": 424,
"love": 328,
"normal": 376,
"technology": 542,
"right": 467,
"saw": 475,
"listing": 312,
"best": 45,
"wish": 617,
"happened": 233,
"little": 314,
"fine": 189,
"reach": 444,
"company": 98,
"running": 471,
"business": 65,
"earning": 154,
"capital": 71,
"page": 392,
"experience": 172,
"past": 395,
"shit": 494,
"easy": 156,
"investors": 276,
"situation": 505,
"digital": 139,
"follow": 192,
"personal": 403,
"ll": 316,
"known": 289,
"exactly": 166,
"certain": 75,
"perfect": 400,
"media": 346,
"including": 266,
"wan": 603,
"fix": 190,
"fact": 176,
"kind": 285,
"issues": 279,
"option": 390,
"google": 220,
"fee": 184,
"unless": 580,
"imagine": 263,
"idea": 260,
"binance": 49,
"withdraw": 618,
"bot": 54,
"instead": 270,
"wants": 606,
"hey": 244,
"recommend": 454,
"action": 6,
"wanted": 605,
"lets": 301,
"video": 597,
"secure": 481,
"watch": 607,
"opportunity": 389,
"happens": 235,
"bad": 40,
"man": 336,
"weeks": 613,
"ta": 534,
"allow": 18,
"making": 335,
"latest": 293,
"crazy": 112,
"words": 624,
"thinking": 553,
"asking": 33,
"risk": 468,
"feature": 182,
"fun": 201,
"gone": 218,
"data": 121,
"nah": 365,
"transaction": 568,
"period": 401,
"months": 360,
"report": 462,
"product": 431,
"provide": 437,
"fast": 181,
"plans": 407,
"meet": 347,
"selling": 486,
"cause": 74,
"strong": 524,
"recently": 453,
"gives": 210,
"security": 482,
"global": 213,
"basically": 42,
"term": 545,
"invest": 273,
"problem": 428,
"gave": 206,
"amazing": 23,
"power": 421,
"changed": 79,
"expect": 171,
"public": 438,
"means": 345,
"profit": 432,
"definitely": 128,
"increase": 267,
"month": 359,
"simple": 500,
"fair": 177,
"official": 380,
"youtube": 641,
"matter": 341,
"offer": 379,
"exchange": 168,
"id": 259,
"believe": 43,
"order": 391,
"paid": 393,
"add": 9,
"couple": 110,
"yesterday": 640,
"info": 268,
"god": 214,
"save": 474,
"guess": 225,
"directly": 140,
"site": 504,
"bought": 55,
"updated": 583,
"maybe": 343,
"step": 522,
"literally": 313,
"block": 52,
"type": 577,
"low": 329,
"ur": 585,
"lot": 326,
"attention": 35,
"close": 89,
"word": 623,
"important": 265,
"user": 589,
"kinda": 286,
"complete": 99,
"longer": 320,
"simply": 501,
"example": 167,
"web": 610,
"probably": 427,
"clear": 87,
"usually": 592,
"ai": 17,
"holding": 252,
"assets": 34,
"supply": 530,
"moving": 362,
"given": 209,
"happening": 234,
"showing": 496,
"went": 615,
"chart": 81,
"form": 195,
"sign": 498,
"minutes": 354,
"appreciate": 29,
"especially": 162,
"major": 332,
"main": 331,
"launched": 295,
"earlier": 151,
"services": 491,
"short": 495,
"idk": 261,
"break": 56,
"half": 230,
"gets": 207,
"explain": 173,
"fully": 200,
"problems": 429,
"head": 239,
"hour": 256,
"seen": 484,
"forget": 194,
"near": 366,
"financial": 188,
"away": 38,
"damn": 120,
"hopefully": 255,
"depends": 129,
"opinion": 388,
"grow": 224,
"small": 506,
"imo": 264,
"giving": 211,
"called": 69,
"job": 280,
"features": 183,
"updates": 584,
"checked": 84,
"limited": 306,
"dude": 150,
"extra": 174,
"visit": 598,
"doubt": 148,
"completely": 100,
"service": 490,
"dev": 132,
"avoid": 37,
"doesn": 144,
"isn": 277,
"zero": 642,
"ah": 15,
"moment": 357,
"success": 526,
"legit": 299,
"specific": 516,
"alot": 20,
"missed": 356,
"article": 30,
"special": 515,
"till": 556,
"asked": 32,
"difference": 137,
"quick": 441,
"wonder": 622,
"ppl": 422,
"similar": 499
}
}
}