davanstrien's picture
davanstrien HF staff
Add BERTopic model
863586c
raw
history blame
11.4 kB
{
"topic_representations": {
"-1": [
[
"evaluation",
0.6230688553227475
],
[
"claim",
0.5968246831891744
],
[
"reasoning",
0.5754221015746908
],
[
"parameters",
0.517542883360015
],
[
"university",
0.5135697637359796
],
[
"argumentative",
0.5135697637359796
],
[
"repositoryhttpsgithubcomhuntlaboratorylanguagemodeloptimization",
0.5135697637359796
],
[
"review",
0.5135697637359796
],
[
"gptneo27bhttpshuggingfacecoeleutheraigptneo27b",
0.5135697637359796
],
[
"projecthttpsgithubcomhuntlaboratorylanguagemodeloptimization",
0.5135697637359796
]
],
"0": [
[
"checkpoint",
0.37175879363307746
],
[
"fairly",
0.3515890274807403
],
[
"characterized",
0.3515890274807403
],
[
"even",
0.35086147648416083
],
[
"sectionhttpshuggingfacecobertbaseuncased",
0.3479922000487333
],
[
"snippet",
0.3479922000487333
],
[
"try",
0.3479922000487333
],
[
"limitation",
0.34725276087685114
],
[
"particular",
0.3465181958462063
],
[
"could",
0.3452033650501046
]
],
"1": [
[
"generative",
0.548511275172796
],
[
"research",
0.5179454603309872
],
[
"uses",
0.4725663936926501
],
[
"processes",
0.47110219358638483
],
[
"artistic",
0.47110219358638483
],
[
"probing",
0.47110219358638483
],
[
"creative",
0.47110219358638483
],
[
"design",
0.47110219358638483
],
[
"tools",
0.47110219358638483
],
[
"educational",
0.47110219358638483
]
],
"2": [
[
"checkpoint",
0.3814770760817889
],
[
"try",
0.3570891912912861
],
[
"snippet",
0.3570891912912861
],
[
"sectionhttpshuggingfacecobertbaseuncased",
0.3570891912912861
],
[
"limitation",
0.3563304221698531
],
[
"particular",
0.3555766546063644
],
[
"fairly",
0.35261038076997664
],
[
"characterized",
0.35261038076997664
],
[
"even",
0.3518807162643131
],
[
"present",
0.35043527354806714
]
],
"3": [
[
"meant",
0.9976049477912707
],
[
"technical",
0.9976049477912707
],
[
"sociotechnical",
0.9976049477912707
],
[
"convey",
0.9976049477912707
],
[
"needed",
0.9872038703943972
],
[
"section",
0.9712653235792772
],
[
"both",
0.936739855710452
],
[
"risks",
0.9068075576218514
],
[
"information",
0.9018883381886229
],
[
"more",
0.8122384634629694
]
],
"4": [
[
"gpt2",
0.4932675297731254
],
[
"team",
0.4582824401382136
],
[
"their",
0.4041671222778528
],
[
"cardhttpsgithubcomopenaigpt2blobmastermodelcardmd",
0.4027027523328499
],
[
"worked",
0.4000615700284105
],
[
"man",
0.4000615700284105
],
[
"examples",
0.3826810158367596
],
[
"card",
0.37841251284183997
],
[
"releasing",
0.37020691048768467
],
[
"generatedtext",
0.36633590684014183
]
],
"5": [
[
"datasets",
0.4655852272500585
],
[
"internet",
0.4632180977092728
],
[
"unfiltered",
0.4632180977092728
],
[
"therefore",
0.4572786367109269
],
[
"lot",
0.45052751090806786
],
[
"far",
0.44843349146591505
],
[
"least",
0.43181001148070325
],
[
"from",
0.4317049782136603
],
[
"spanish",
0.4228812607169984
],
[
"contains",
0.4189869183810361
]
],
"6": [
[
"dacy",
0.5585722925848415
],
[
"danish",
0.5448223053975801
],
[
"pipelines",
0.4762154576109096
],
[
"transformer",
0.45909551554311984
],
[
"bert",
0.4560723670964845
],
[
"stateoftheart",
0.43890761608742057
],
[
"vectors",
0.4171033873896881
],
[
"entropybased",
0.4171033873896881
],
[
"morphologizer",
0.4171033873896881
],
[
"ner",
0.4171033873896881
]
],
"7": [
[
"your",
0.5779547008577203
],
[
"pythia",
0.533302725435212
],
[
"branch",
0.533302725435212
],
[
"checkpoints",
0.533302725435212
],
[
"provide",
0.5255179253814468
],
[
"you",
0.5017001021320695
],
[
"face",
0.49279688086107165
],
[
"hugging",
0.49279688086107165
],
[
"intended",
0.4649625117440713
],
[
"use",
0.457852805651761
]
],
"8": [
[
"opt",
0.3938333445473251
],
[
"trained",
0.3929995606746999
],
[
"large",
0.3894606240300861
],
[
"software",
0.37368561490751695
],
[
"code",
0.3692783616071311
],
[
"impact",
0.35450930158449734
],
[
"to",
0.3501577946670958
],
[
"limited",
0.3497691863778163
],
[
"aim",
0.3497691863778163
],
[
"while",
0.34819943887361066
]
],
"9": [
[
"al",
0.8638378408615067
],
[
"et",
0.8578829364103318
],
[
"identity",
0.742895984959117
],
[
"occupational",
0.742895984959117
],
[
"groups",
0.742895984959117
],
[
"protected",
0.742895984959117
],
[
"characteristics",
0.742895984959117
],
[
"across",
0.7323536580412874
],
[
"social",
0.7323536580412874
],
[
"classes",
0.7323536580412874
]
]
},
"topics": [
1,
1,
1,
0,
4,
3,
2,
8,
1,
8,
0,
0,
1,
7,
4,
0,
1,
2,
5,
1,
8,
4,
4,
1,
1,
0,
8,
5,
6,
0,
5,
0,
0,
5,
0,
0,
-1,
0,
8,
0,
7,
2,
0,
-1,
4,
0,
0,
3,
0,
0,
8,
0,
2,
5,
3,
8,
1,
0,
0,
0,
9,
8,
6,
1,
3,
0,
0,
7,
5,
0,
6,
4,
0,
6,
1,
1,
0,
4,
8,
0,
1,
3,
3,
1,
8,
-1,
2,
2,
5,
1,
2,
4,
0,
0,
2,
1,
0,
0,
0,
0,
6,
0,
0,
0,
0,
-1,
1,
1,
0,
0,
9,
0,
8,
5,
1,
3,
0,
0,
7,
4,
0,
5,
9,
1,
3,
7,
7,
0,
1,
0,
2,
0,
2,
4,
7,
0,
0,
8,
0,
0,
6,
-1,
0,
0,
1,
3,
5,
0,
4,
0,
0,
1,
4,
7,
3,
1,
0,
4,
8,
0,
0,
0,
6,
-1,
0,
1,
9,
2,
1,
0,
6,
0,
0,
4,
1,
0,
9,
1,
1,
6,
3,
5,
2,
2,
2,
6,
-1,
2,
-1,
2,
0,
5,
2,
4,
2,
5,
6,
0,
3,
0,
0,
9,
5,
0,
0,
1,
3,
0,
4,
2,
0,
0,
0,
4,
9,
3,
0,
7,
0,
0,
4,
0,
3,
8,
0,
0,
1,
1,
3,
0,
3,
6,
3,
-1,
0,
1,
2,
0,
0,
0,
1,
0,
6,
3,
4,
4,
0,
7,
-1,
6,
0,
1,
2,
0,
1,
7,
9,
4,
1,
-1,
0,
0,
1,
7,
0,
0,
0,
5,
0,
9,
4,
1,
7,
4,
1,
0,
0,
5,
0,
2,
0,
0,
8,
-1,
0,
9,
0,
6,
0,
0,
0,
3,
6,
9,
0,
0,
3,
3,
0,
1,
9,
0,
3,
3,
0,
5,
4,
0,
5,
3,
1,
5,
6,
0,
0,
0,
0,
0,
0,
0,
3,
0,
-1,
5,
3,
2,
0,
6,
2,
2,
9,
0,
0,
0,
0,
3,
1,
0,
5,
4,
0,
5,
6,
0,
4,
0,
3,
4,
1,
0,
7,
2,
2,
5,
7,
2,
3,
2,
2,
2,
0,
0,
1,
6,
1,
0,
5,
0,
3,
0,
1,
0,
0,
3,
5,
2,
0
],
"topic_sizes": {
"0": 137,
"1": 48,
"2": 32,
"3": 32,
"4": 27,
"5": 25,
"6": 20,
"7": 15,
"8": 15,
"-1": 13,
"9": 13
},
"topic_mapper": [
[
-1,
-1,
-1
],
[
0,
0,
0
],
[
1,
1,
2
],
[
2,
2,
7
],
[
3,
3,
9
],
[
4,
4,
6
],
[
5,
5,
5
],
[
6,
6,
3
],
[
7,
7,
1
],
[
8,
8,
8
],
[
9,
9,
4
]
],
"topic_labels": {
"-1": "-1_evaluation_claim_reasoning_parameters",
"0": "0_checkpoint_fairly_characterized_even",
"1": "1_generative_research_uses_processes",
"2": "2_checkpoint_try_snippet_sectionhttpshuggingfacecobertbaseuncased",
"3": "3_meant_technical_sociotechnical_convey",
"4": "4_gpt2_team_their_cardhttpsgithubcomopenaigpt2blobmastermodelcardmd",
"5": "5_datasets_internet_unfiltered_therefore",
"6": "6_dacy_danish_pipelines_transformer",
"7": "7_your_pythia_branch_checkpoints",
"8": "8_opt_trained_large_software",
"9": "9_al_et_identity_occupational"
},
"custom_labels": null,
"_outliers": 1
}