alpertml commited on
Commit
76dd92b
1 Parent(s): 06b4325

Upload 15 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/_20news_df_output_doc_topic_CTM.csv filter=lfs diff=lfs merge=lfs -text
37
+ data/_20news_df_output_doc_topic_LDA.csv filter=lfs diff=lfs merge=lfs -text
38
+ data/_20news_df_output_doc_topic_NMF.csv filter=lfs diff=lfs merge=lfs -text
39
+ data/_20news_df_output_doc_topic_Top2Vec.csv filter=lfs diff=lfs merge=lfs -text
40
+ data/sample_text.csv filter=lfs diff=lfs merge=lfs -text
data/_20news_df_output_clusterId_label_words.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id_cluster,label_class,list_topic_words
2
+ -1,#,"['ax', 'line', 'subject', 'organization', 'one', 'would', 'writes', 'article', 'posting', 'university', 'like', 'host', 'get', 'nntp', 'know', 'max', 'time', 'system', 'people', 'use']"
3
+ 0,talk.politics.mideast,"['israel', 'israeli', 'arab', 'jew', 'palestinian', 'jewish', 'people', 'right', 'peace', 'writes', 'war', 'article', 'would', 'state', 'jake', 'policy', 'civilian', 'one', 'subject', 'country']"
4
+ 1,rec.sport.baseball,"['game', 'player', 'baseball', 'year', 'pitcher', 'hit', 'team', 'hitter', 'run', 'writes', 'article', 'line', 'pitch', 'ball', 'clutch', 'organization', 'subject', 'last', 'season', 'base']"
5
+ 2,rec.motorcycles,"['bike', 'dod', 'dog', 'motorcycle', 'ride', 'helmet', 'riding', 'line', 'organization', 'rider', 'writes', 'subject', 'article', 'tire', 'countersteering', 'one', 'like', 'bmw', 'advice', 'posting']"
6
+ 3,comp.os.ms-windows.misc,"['window', 'do', 'mouse', 'microsoft', 'file', 'line', 'nt', 'subject', 'application', 'system', 'organization', 'pc', 'win', 'user', 'software', 'manager', 'com', 'program', 'run', 'use']"
7
+ 4,talk.politics.guns,"['president', 'stephanopoulos', 'mr', 'fbi', 'fire', 'think', 'myers', 'batf', 'people', 'would', 'koresh', 'atf', 'going', 'waco', 'know', 'said', 'compound', 'writes', 'gas', 'child']"
8
+ 5,rec.sport.hockey,"['team', 'game', 'hockey', 'player', 'nhl', 'blue', 'goal', 'pit', 'det', 'gm', 'play', 'leaf', 'playoff', 'league', 'chi', 'wing', 'win', 'bos', 'season', 'tor']"
9
+ 6,soc.religion.christian,"['god', 'jesus', 'church', 'sin', 'christian', 'christ', 'bible', 'mary', 'hell', 'law', 'faith', 'one', 'heaven', 'catholic', 'say', 'would', 'people', 'believe', 'eternal', 'subject']"
10
+ 7,comp.sys.ibm.pc.hardware,"['drive', 'scsi', 'ide', 'mb', 'disk', 'controller', 'bus', 'hard', 'pc', 'system', 'floppy', 'do', 'mac', 'line', 'bit', 'subject', 'organization', 'meg', 'card', 'problem']"
11
+ 8,sci.crypt,"['key', 'encryption', 'clipper', 'chip', 'escrow', 'government', 'phone', 'algorithm', 'de', 'system', 'security', 'nsa', 'bit', 'would', 'public', 'use', 'line', 'encrypted', 'crypto', 'law']"
12
+ 9,talk.politics.misc,"['homosexual', 'gay', 'cramer', 'homosexuality', 'sex', 'sexual', 'clayton', 'men', 'people', 'marriage', 'christian', 'study', 'sin', 'god', 'male', 'writes', 'article', 'one', 'would', 'love']"
13
+ 10,rec.autos,"['car', 'dealer', 'engine', 'ford', 'price', 'writes', 'saturn', 'subject', 'line', 'organization', 'article', 'diesel', 'mustang', 'auto', 'would', 'like', 'new', 'automatic', 'transmission', 'university']"
14
+ 11,sci.med,"['candida', 'patient', 'vitamin', 'gordon', 'doctor', 'yeast', 'geb', 'disease', 'bank', 'infection', 'quack', 'treatment', 'pain', 'kidney', 'physician', 'stone', 'medical', 'diet', 'migraine', 'bloom']"
15
+ 12,alt.atheism,"['god', 'atheist', 'belief', 'atheism', 'christian', 'believe', 'truth', 'faith', 'absolute', 'say', 'religion', 'existence', 'people', 'one', 'reason', 'exist', 'jesus', 'would', 'think', 'evidence']"
16
+ 13,talk.politics.guns,"['gun', 'firearm', 'weapon', 'right', 'militia', 'people', 'handgun', 'amendment', 'crime', 'control', 'state', 'would', 'criminal', 'arm', 'one', 'well', 'law', 'revolver', 'police', 'like']"
17
+ 14,comp.sys.mac.hardware,"['monitor', 'video', 'vga', 'line', 'color', 'screen', 'computer', 'subject', 'resolution', 'organization', 'horizontal', 'apple', 'problem', 'mac', 'atari', 'sync', 'university', 'nec', 'mode', 'power']"
18
+ 15,comp.graphics,"['jpeg', 'image', 'color', 'gif', 'file', 'bit', 'format', 'program', 'display', 'version', 'viewer', 'pixel', 'quality', 'convert', 'jfif', 'colormap', 'colour', 'use', 'graphic', 'window']"
19
+ 16,sci.space,"['space', 'mission', 'orbit', 'hst', 'nasa', 'shuttle', 'probe', 'satellite', 'earth', 'data', 'spacecraft', 'lunar', 'baalke', 'solar', 'image', 'propulsion', 'jupiter', 'mar', 'launch', 'planetary']"
20
+ 17,misc.forsale,"['sound', 'audio', 'amp', 'stereo', 'channel', 'speaker', 'sale', 'line', 'voltage', 'relay', 'subject', 'organization', 'midi', 'input', 'cd', 'output', 'switch', 'amplifier', 'circuit', 'signal']"
21
+ 18,comp.os.ms-windows.misc,"['printer', 'print', 'deskjet', 'hp', 'laser', 'ink', 'bubblejet', 'bj', 'printing', 'canon', 'driver', 'line', 'subject', 'organization', 'postscript', 'cartridge', 'page', 'university', 'paper', 'problem']"
22
+ 19,talk.politics.mideast,"['armenian', 'turkish', 'armenia', 'turk', 'turkey', 'greek', 'argic', 'serdar', 'muslim', 'soviet', 'azeri', 'people', 'genocide', 'serum', 'russian', 'village', 'greece', 'istanbul', 'tartar', 'azerbaijan']"
data/_20news_df_output_doc_topic_CTM.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04dc9cd291442e9836d3e2d5836c27034a4a9cd2230b4269a26964c254a75a70
3
+ size 10527298
data/_20news_df_output_doc_topic_CTM_LIST.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id_cluster,label_class,list_topic_words
2
+ 0,sci.space,"['health', 'patient', 'medical', 'space', 'disease', 'study', 'year', 'treatment', 'drug', 'april']"
3
+ 1,misc.forsale,"['notice', 'passed', 'suggested', 'higher', 'dark', 'background', 'larger', 'choice', 'requirement', 'discus']"
4
+ 2,comp.windows.x,"['pub', 'ftp', 'edu', 'format', 'archive', 'image', 'graphic', 'version', 'faq', 'site']"
5
+ 3,talk.politics.guns,"['atf', 'fire', 'waco', 'fbi', 'disclaimer', 'koresh', 'gas', 'survivor', 'reserve', 'bank']"
6
+ 4,talk.politics.mideast,"['people', 'right', 'article', 'atheism', 'homosexual', 'would', 'israel', 'writes', 'one', 'argument']"
7
+ 5,soc.religion.christian,"['trouble', 'suggested', 'notice', 'avoid', 'count', 'choice', 'higher', 'consideration', 'changed', 'passed']"
8
+ 6,rec.autos,"['car', 'engine', 'bike', 'dod', 'ride', 'mile', 'auto', 'oil', 'ford', 'motorcycle']"
9
+ 7,talk.politics.mideast,"['soviet', 'turkish', 'armenia', 'turk', 'armenian', 'genocide', 'muslim', 'soldier', 'people', 'russian']"
10
+ 8,soc.religion.christian,"['jesus', 'god', 'church', 'one', 'bible', 'christ', 'scripture', 'spirit', 'passage', 'sin']"
11
+ 9,comp.os.ms-windows.misc,"['max', 'mi', 'md', 'window', 'pl', 'tm', 'ex', 'id', 'ah', 'mb']"
12
+ 10,alt.atheism,"['objective', 'morality', 'moral', 'god', 'christianity', 'exist', 'faith', 'writes', 'christian', 'atheist']"
13
+ 11,comp.windows.x,"['window', 'application', 'manager', 'font', 'help', 'thanks', 'screen', 'mouse', 'motif', 'graphic']"
14
+ 12,comp.sys.ibm.pc.hardware,"['card', 'board', 'monitor', 'video', 'thanks', 'port', 'offer', 'driver', 'modem', 'drive']"
15
+ 13,rec.sport.hockey,"['red', 'boston', 'leaf', 'fan', 'playoff', 'blue', 'lost', 'baseball', 'game', 'ranger']"
16
+ 14,talk.politics.guns,"['would', 'think', 'people', 'president', 'know', 'going', 'get', 'time', 'fbi', 'right']"
17
+ 15,comp.sys.ibm.pc.hardware,"['scsi', 'drive', 'ide', 'bus', 'disk', 'controller', 'mac', 'problem', 'card', 'mb']"
18
+ 16,sci.crypt,"['chip', 'encryption', 'clipper', 'escrow', 'wiretap', 'crypto', 'key', 'secret', 'secure', 'nsa']"
19
+ 17,misc.forsale,"['ac', 'sale', 'shipping', 'email', 'looking', 'distribution', 'asking', 'usa', 'express', 'nntp']"
20
+ 18,misc.forsale,"['nntp', 'posting', 'host', 'distribution', 'usa', 'sale', 'offer', 'interested', 'ca', 'ohio']"
21
+ 19,rec.sport.hockey,"['year', 'game', 'player', 'team', 'appears', 'season', 'hockey', 'league', 'play', 'st']"
data/_20news_df_output_doc_topic_LDA.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b87675ff2422b7c0411266320a5cca316bed5c102427b06610fdb35eec5ceac
3
+ size 13063689
data/_20news_df_output_doc_topic_LDA_LIST.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id_cluster,label_class,list_topic_words
2
+ 0,soc.religion.christian,"['god', 'line', 'output', 'lord', 'christ', 'would', 'book', 'problem', 'subject', 'organization']"
3
+ 1,comp.sys.mac.hardware,"['drive', 'system', 'disk', 'mac', 'hard', 'problem', 'get', 'scsi', 'also', 'card']"
4
+ 2,comp.graphics,"['graphic', 'file', 'pub', 'mail', 'ray', 'send', 'edu', 'object', 'archive', 'output']"
5
+ 3,comp.os.ms-windows.misc,"['max', 'pl', 'tm', 'mb', 'mr', 'mi', 'ex', 'ah', 'au', 'de']"
6
+ 4,sci.electronics,"['one', 'use', 'get', 'two', 'see', 'also', 'new', 'time', 'first', 'year']"
7
+ 5,comp.graphics,"['image', 'gif', 'color', 'format', 'quality', 'software', 'pixel', 'free', 'tool', 'version']"
8
+ 6,comp.windows.x,"['available', 'ftp', 'edu', 'pub', 'version', 'format', 'graphic', 'package', 'source', 'contact']"
9
+ 7,sci.space,"['system', 'data', 'drive', 'disk', 'support', 'feature', 'software', 'space', 'user', 'rom']"
10
+ 8,comp.windows.x,"['data', 'image', 'available', 'also', 'information', 'sun', 'based', 'set', 'tool', 'motif']"
11
+ 9,rec.sport.hockey,"['would', 'know', 'writes', 'article', 'think', 'line', 'organization', 'subject', 'year', 'like']"
12
+ 10,sci.crypt,"['state', 'law', 'use', 'information', 'question', 'new', 'american', 'must', 'example', 'may']"
13
+ 11,soc.religion.christian,"['jesus', 'said', 'people', 'say', 'know', 'one', 'armenian', 'day', 'lord', 'come']"
14
+ 12,comp.graphics,"['file', 'image', 'program', 'gif', 'format', 'gun', 'color', 'output', 'section', 'may']"
15
+ 13,misc.forsale,"['line', 'organization', 'subject', 'writes', 'article', 'host', 'nntp', 'people', 'world', 'university']"
16
+ 14,alt.atheism,"['god', 'atheist', 'atheism', 'religion', 'religious', 'christian', 'belief', 'many', 'believe', 'one']"
17
+ 15,comp.windows.x,"['mail', 'computer', 'internet', 'server', 'anonymous', 'posting', 'system', 'user', 'privacy', 'information']"
18
+ 16,comp.os.ms-windows.misc,"['pl', 'md', 'sc', 'ah', 'id', 'wa', 'mr', 'st', 'gif', 'tm']"
19
+ 17,comp.os.ms-windows.misc,"['do', 'window', 'file', 'microsoft', 'program', 'mouse', 'output', 'font', 'version', 'server']"
20
+ 18,comp.graphics,"['bit', 'use', 'color', 'window', 'driver', 'mac', 'program', 'version', 'scsi', 'hardware']"
21
+ 19,talk.politics.guns,"['people', 'would', 'one', 'think', 'make', 'president', 'know', 'mr', 'well', 'even']"
data/_20news_df_output_doc_topic_NMF.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b33a5baa539dcdc12d23c65bec27d9aa26473e1b8bae3629a2384f2a4b120cee
3
+ size 13068868
data/_20news_df_output_doc_topic_NMF_LIST.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id_cluster,label_class,list_topic_words
2
+ 0,soc.religion.christian,"['god', 'line', 'output', 'lord', 'christ', 'would', 'book', 'problem', 'subject', 'organization']"
3
+ 1,comp.sys.mac.hardware,"['drive', 'system', 'disk', 'mac', 'hard', 'problem', 'get', 'scsi', 'also', 'card']"
4
+ 2,comp.graphics,"['graphic', 'file', 'pub', 'mail', 'ray', 'send', 'edu', 'object', 'archive', 'output']"
5
+ 3,comp.os.ms-windows.misc,"['max', 'pl', 'tm', 'mb', 'mr', 'mi', 'ex', 'ah', 'au', 'de']"
6
+ 4,sci.electronics,"['one', 'use', 'get', 'two', 'see', 'also', 'new', 'time', 'first', 'year']"
7
+ 5,comp.graphics,"['image', 'gif', 'color', 'format', 'quality', 'software', 'pixel', 'free', 'tool', 'version']"
8
+ 6,comp.windows.x,"['available', 'ftp', 'edu', 'pub', 'version', 'format', 'graphic', 'package', 'source', 'contact']"
9
+ 7,sci.space,"['system', 'data', 'drive', 'disk', 'support', 'feature', 'software', 'space', 'user', 'rom']"
10
+ 8,comp.windows.x,"['data', 'image', 'available', 'also', 'information', 'sun', 'based', 'set', 'tool', 'motif']"
11
+ 9,rec.sport.hockey,"['would', 'know', 'writes', 'article', 'think', 'line', 'organization', 'subject', 'year', 'like']"
12
+ 10,sci.crypt,"['state', 'law', 'use', 'information', 'question', 'new', 'american', 'must', 'example', 'may']"
13
+ 11,soc.religion.christian,"['jesus', 'said', 'people', 'say', 'know', 'one', 'armenian', 'day', 'lord', 'come']"
14
+ 12,comp.graphics,"['file', 'image', 'program', 'gif', 'format', 'gun', 'color', 'output', 'section', 'may']"
15
+ 13,misc.forsale,"['line', 'organization', 'subject', 'writes', 'article', 'host', 'nntp', 'people', 'world', 'university']"
16
+ 14,alt.atheism,"['god', 'atheist', 'atheism', 'religion', 'religious', 'christian', 'belief', 'many', 'believe', 'one']"
17
+ 15,comp.windows.x,"['mail', 'computer', 'internet', 'server', 'anonymous', 'posting', 'system', 'user', 'privacy', 'information']"
18
+ 16,comp.os.ms-windows.misc,"['pl', 'md', 'sc', 'ah', 'id', 'wa', 'mr', 'st', 'gif', 'tm']"
19
+ 17,comp.os.ms-windows.misc,"['do', 'window', 'file', 'microsoft', 'program', 'mouse', 'output', 'font', 'version', 'server']"
20
+ 18,comp.graphics,"['bit', 'use', 'color', 'window', 'driver', 'mac', 'program', 'version', 'scsi', 'hardware']"
21
+ 19,talk.politics.guns,"['people', 'would', 'one', 'think', 'make', 'president', 'know', 'mr', 'well', 'even']"
data/_20news_df_output_doc_topic_Top2Vec.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:838c4667500d7d544ef1f6732d5c4017371618ce92573790953c7d53c4a210d6
3
+ size 21296146
data/_20news_df_output_doc_topic_Top2Vec_LIST.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id_cluster,label_class,list_topic_words
2
+ 0,soc.religion.christian,"['bible', 'god', 'jesus', 'religion', 'scripture', 'christianity', 'christ', 'atheist', 'prophet', 'christian', 'religious', 'church', 'muslim', 'theist', 'quran', 'verse', 'doctrine', 'gospel', 'belief', 'holy', 'testament', 'psalm', 'islam', 'apostle', 'faith', 'believer', 'theology', 'heresy', 'congregation', 'atheism', 'interpretation', 'disciple', 'revelation', 'catholic', 'armenian', 'salvation', 'truth', 'corinthian', 'khomeini', 'baptism', 'sin', 'persecution', 'teaching', 'turkish', 'passage', 'hebrew', 'biblical', 'isaiah']"
3
+ 1,comp.windows.x,"['window', 'olwmxterm', 'open', 'windows', 'default', 's', 'pointer', 'menu', 'intrinsics', 'client', 'xdm', 'widget', 'openwin', 'bitmap', 'colormap', 'xlib', 'ncd', 'screen', 'file', 'server', 'icon', 'manager', 'display', 'application', 'directory', 'button', 'config', 'default', 'ini', 'pixmap', 'openlook', 'font', 'sunos', 'paste', 'exit', 'sparc', 'mwm', 'console', 'xaw', 'ctrl', 'twm', 'xpert', 'ndet', 'running', 'program', 'args', 'mouse', 'fprintf', 'motif', 'contrib', 'exe']"
4
+ 2,comp.sys.mac.hardware,"['apple', 'sale', 'shipping', 'powerbook', 'coprocessor', 'lc', 'iii', 'price', 'simm', 'brand', 'offer', 'iis', 'ifp', 'ui', 'cic', 'entris', 'nubus', 'mac', 'for', 'sale', 'slot', 'vram', 'printer', 'simm', 'selling', 'quadra', 'bought', 'sell', 'upgrade', 'monitor', 'adapter', 'cartridge', 'duo', 'external', 'purchased', 'lc', 'plus', 'asking', 'buying', 'sony', 'portable', 'interested', 'buyer', 'dpi', 'ethernet', 'obo', 'codram', 'deskjet', 'laserjet', 'modem', 'mhz', 'hanover']"
5
+ 3,talk.politics.misc,"['gun', 'firearm', 'criminal', 'handgun', 'crime', 'nra', 'constitutional', 'democrat', 'homicide', 'politician', 'abiding', 'cramer', 'clayton', 'brady', 'liberal', 'homosexual', 'amendment', 'rkb', 'federal', 'voter', 'clinton', 'constitution', 'economic', 'social', 'shotgun', 'bang', 'government', 'tax', 'opti', 'link', 'veal', 'gay', 'liberty', 'abuse', 'citizen', 'atheist', 'legal', 'self', 'violent', 'minority', 'petaluma', 'privacy', 'drug', 'majority', 'cipriani', 'enforce', 'election', 'freedom', 'heterosexual', 'jail', 'health']"
6
+ 4,rec.sport.hockey,"['playoff', 'hockey', 'bruin', 'nhl', 'goalie', 'bure', 'goaltender', 'mogilny', 'blackhawks', 'islander', 'lafontaine', 'potvin', 'canadien', 'lemieux', 'powerplay', 'quebec', 'gilmour', 'canuck', 'penguin', 'whaler', 'elanne', 'defenseman', 'puck', 'shanahan', 'overtime', 'yzerman', 'chhabra', 'fuhr', 'habs', 'recchi', 'barrasso', 'gretzky', 'cup', 'stanley', 'domi', 'oderstrom', 'adirondack', 'oiler', 'sabre', 'probert', 'chelios', 'messier', 'rauser', 'flyer', 'isle', 'skate', 'edmonton', 'tournament', 'dineen', 'devil']"
7
+ 5,sci.electronics,"['championship', 'mail', 'hockey', 'email', 'sweden', 'nhl', 'canada', 'please', 'hello', 'reply', 'internet', 'motorcycle', 'histaff', 'an', 'space', 'tournament', 'subject', 'appreciate', 'finland', 'posciqw', 'sale', 'playoff', 'thanks', 'university', 'appreciated', 'posting', 'carnegie', 'xj', 'mailing', 'fax', 'send', 'honda', 'engineering', 'computer', 'news', 'cmu', 'moa', 'computing', 'thanks', 'yr', 'usa', 'advance', 'hv', 'net', 'news', 'post', 'ua', 'phone']"
8
+ 6,sci.electronics,"['amp', 'circuit', 'voltage', 'capacitor', 'amplifier', 'transistor', 'signal', 'receiver', 'ohm', 'transmitter', 'frequency', 'radio', 'antenna', 'shack', 'transformer', 'volt', 'tube', 'audio', 'stereo', 'khz', 'metal', 'resistor', 'watt', 'pulse', 'grounding', 'meter', 'electronics', 'light', 'detector', 'analog', 'filter', 'connector', 'satellite', 'orbit', 'relay', 'conductor', 'noise', 'dtm', 'edinttl', 'wiring', 'gfcioorttvvc', 'radar', 'lamp', 'diameter', 'orbiting', 'switch', 'sensor']"
9
+ 7,comp.sys.ibm.pc.hardware,"['ide', 'controller', 'vlb', 'scsi', 'motherboard', 'bus', 'drive', 'eisa', 'isa', 'vga', 'vram', 'max', 'tores', 'di', 'card', 'dma', 'vesa', 'floppy', 'mhz', 'seagate', 'dram', 'meg', 'mf', 'mhdd', 'adapter', 'bios', 'chipset', 'pen', 'evati', 'simm', 'slot', 'ram', 'mb', 'jumpers', 'simms', 'orchid', 'board', 'adaptec', 'nubus', 'dx', 'monitor', 'ultra', 'vga', 'cache', 'hard', 'apple', 'benchmark', 'hd', 'trident', 'toshiba', 'brand']"
10
+ 8,rec.sport.baseball,"['pitching', 'goaltender', 'season', 'defenseman', 'pitcher', 'innings', 'and', 'berg', 'obp', 'playoff', 'rbi', 'scored', 'goalie', 'player', 'team', 'batter', 'batting', 'bullpen', 'nhl', 'whaler', 'alomar', 'shutout', 'rookie', 'recchi', 'gretzky', 'baerga', 'fielder', 'soderstrom', 'mattingly', 'blackhawks', 'hitter', 'game', 'dodger', 'puck', 'orioles', 'nichols', 'hockey', 'sox', 'tampa', 'winfield', 'potvin', 'talent', 'sherri', 'clemens', 'sanderson', 'score', 'yzerman', 'career', 'pitched', 'gilmour', 'winning']"
11
+ 9,alt.atheism,"['atheist', 'morality', 'theist', 'livesey', 'dwyer', 'atheism', 'bob', 'be', 'mozumder', 'beauchaine', 'solntze', 'morals', 'schneider', 'kmr', 'bilw', 'pdo', 'kc', 'forum', 'benedikt', 'jaeger', 'allan', 'agnostic', 'cook', 'amung', 'amotto', 'islam', 'theism', 'religion', 'keith', 'rosenau', 'evolution', 'mathew', 'rushdie', 'conner', 'objective', 'darice', 'rus', 'news', 'gregg', 'khomeini', 'punisher', 'sandvik', 'objectively', 'halat', 'exist', 'specie', 'immoral', 'heal', 'taq', 'quran', 'mantis', 'subjective', 'believer', 'belief', 'islam']"
12
+ 10,misc.forsale,"['membership', 'conference', 'campus', 'siggraph', 'registration', 'hotel', 'tour', 'proceeding', 'information', 'acm', 'servicemail', 'cryptology', 'dinner', 'june', 'students', 'suite', 'electronic', 'participant', 'attend', 'july', 'thursday', 'address', 'avenue', 'interested', 'space', 'project', 'ticket', 'publication', 'tutorial', 'bethesda', 'monday', 'sponsor', 'hill', 'payment', 'offer', 'airport', 'director', 'meeting', 'bulletin', 'weekend', 'participation', 'letter', 'association', 'county', 'fee', 'email', 'newsletter', 'motorcycle', 'schedule']"
13
+ 11,talk.politics.guns,"['fbi', 'atf', 'davidian', 'koresh', 'compound', 'batf', 'waco', 'bdf', 'fire', 'ranch', 'survivor', 'dividian', 'raid', 'grenade', 'tear', 'warrant', 'cnn', 'reno', 'tank', 'armored', 'cult', 'burn', 'agent', 'suicide', 'knock', 'tavares', 'follower', 'jmd', 'stove', 'investigation', 'cdt', 'paranoid', 'janet', 'fed', 'burned', 'inside', 'knocking', 'branch', 'arras', 'assault', 'burning', 'gas', 'started', 'hostage', 'roby', 'building', 'stratus', 'flee', 'happened', 'davidian']"
14
+ 12,rec.motorcycles,"['riding', 'e', 'green', 'motorcycle', 'dod', 'countersteering', 'biker', 'bikers', 'bikes', 'squid', 'ride', 'helmet', 'cousineau', 'drinking', 'cookson', 'rider', 'infant', 'en', 'pet', 'karr', 'pette', 'far', 'jacket', 'mjs', 'blaine', 'puck', 'newbie', 'bgardner', 'pillion', 'playoffs', 'kate', 'coach', 'lean', 'levine', 'balcony', 'drunk', 'boom', 'hockey', 'mamma', 'infield', 'jody', 'harley', 'keeper', 'sorenson', 'sabre', 'nhl', 'azerbaijan', 'iran', 'ckspeedy', 'cruiser', 'wheelie', 'slyuda', 'ama']"
15
+ 13,comp.graphics,"['graphical', 'programming', 'borland', 'gui', 'application', 'motif', 'framework', 'toolkit', 'api', 'builder', 'window', 'interactive', 'interface', 'package', 'library', 'polygon', 'processing', 'tool', 'programmer', 'oriented', 'graphic', 'toolkit', 'microsoft', 'openlook', 'functionality', 'turbo', 'graph', 'modeling', 'routine', 'compiler', 'widget', 'intrinsics', 'drawing', 'osf', 'architecture', 'platform', 'pc', 'networking', 'apps', 'operating', 'fortran', 'looking', 'vertex', 'spline', 'phigs', 'manual', 'unix', 'feature', 'simulation', 'workstation']"
16
+ 14,sci.crypt,"['clipper', 'escrow', 'encryption', 'scheme', 'escrowed', 'nsa', 'chip', 'crypto', 'secure', 'key', 'algorithm', 'encrypted', 'sternlight', 'hellman', 'strnlght', 'wiretap', 'classified', 'decrypt', 'strnlght', 'c', 'excepted', 'p', 'metzger', 'denning', 'secret', 'encrypt', 'plaintext', 'metzger', 'enforcement', 'd', 'sidorothy', 'f', 'ed', 'omission', 'tapped', 'laissez', 'decryption', 'tap', 'device', 'security', 'ensure', 'ciphertext', 'toal', 'serial', 'qualcomm', 'cryptography', 'cryptosystem', 'approval', 'trust', 'agency', 'spook', 'communication', 'gtoal']"
17
+ 15,sci.space,"['orbit', 'prb', 'shuttle', 'spacecraft', 'mission', 'satellite', 'nsmca', 'solar', 'henry', 'launch', 'payload', 'landing', 'flyby', 'zoology', 'orbital', 'orbiter', 'manned', 'ut', 'zoos', 'st', 'galileo', 'lunar', 'nasa', 'pluto', 'space', 'servicing', 'digex', 'moon', 'mccall', 'propulsion', 'astronaut', 'orbiting', 'flight', 'spencer', 'baalke', 'jsc', 'comet', 'trajectory', 'venus', 'funding', 'ssft', 'titan', 'telescope', 'aircraft', 'earth', 'asteroid', 'kipling', 'jbh', 'design', 'shafer', 'magellan']"
18
+ 16,rec.autos,"['toyota', 'brake', 'sedan', 'car', 'civic', 'torque', 'ford', 'shifter', 'wagon', 'engine', 'chevy', 'mustang', 'nissan', 'cruiser', 'rear', 'tire', 'wheel', 'integra', 'jeep', 'liter', 'honda', 'taurus', 'suspension', 'gear', 'dealer', 'mazda', 'clutch', 'sho', 'accord', 'steering', 'braking', 'rotor', 'opel', 'eliot', 'vehicle', 'motorcycle', 'motor', 'shifting', 'camaro', 'convertible', 'dodge', 'bike', 'valve', 'porsche', 'nhl', 'mpg', 'seat', 'saturn', 'mile', 'highway']"
19
+ 17,rec.motorcycles,"['temperature', 'heat', 'oil', 'drain', 'cooling', 'fuel', 'bike', 'battery', 'thermal', 'water', 'dry', 'liquid', 'gas', 'spray', 'behan', 'na', 'steam', 'exhaust', 'pump', 'cleaning', 'discharge', 'solvent', 'wood', 'bolt', 'dir', 'toxygencylinder', 'cold', 'valve', 'atmosphere', 'diesel', 'bottle', 'wax', 'surface', 'motorcycle', 'warm', 'removing', 'metal', 'cement', 'tower', 'concrete', 'tire', 'emission', 'cool', 'hot', 'tear', 'diameter', 'electricity', 'boom', 'air', 'vacuum']"
20
+ 18,talk.politics.mideast,"['israeli', 'israel', 'arab', 'palestinian', 'livni', 'gaza', 'palestine', 'zionist', 'and', 'it', 'clock', 'beyer', 'jake', 'occupied', 'shostack', 'hamas', 'kaufman', 'davidsson', 'jew', 'terrorism', 'jerusalem', 'gazans', 'lebanese', 'cprin', 'hern', 'leman', 'ahezbollah', 'cwr', 'ushai', 'hela', 'propaganda', 'oac', 'lebanon', 'reserve', 'redundancy', 'amoss', 'syrians', 'syria', 'ghetto', 'guday', 'jewish', 'irgun', 'peace', 'jordan', 'eliat', 'territory', 'plo', 'iraq', 'occupation', 'orion']"
21
+ 19,sci.med,"['disease', 'patient', 'doctor', 'physician', 'diagnosis', 'treatment', 'infection', 'vaginal', 'candida', 'vitamin', 'therapy', 'sinus', 'diagnosed', 'medication', 'chronic', 'chastity', 'syndrome', 'symptoms', 'yeast', 'pregnancy', 'tissue', 'clinical', 'medical', 'diet', 'systemic', 'rind', 'intellect', 'shameful', 'bacteria', 'disorder', 'nerve', 'antibiotic', 'lyme', 'gordon', 'surgery', 'medicine', 'illness', 'liver', 'cure', 'pain', 'quack', 'banschbach', 'dietary', 'kidney', 'migraine', 'dose', 'placebo']"
data/_20news_df_output_topic_word_CTM.csv ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id,method,method_specific_params,dataset,num_given_topics,reduced,topic_num,topic_size,topic_words,word_scores,num_detected_topics,num_final_topics,duration_secs,diversity_unique,diversity_inv_rbo,coherence_npmi,coherence_v,rand_index
2
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,0,732,"['health', 'patient', 'medical', 'space', 'disease', 'study', 'year', 'treatment', 'drug', 'april']","[0.12383451 0.11521751 0.11353351 0.1077095 0.09458103 0.092962
3
+ 0.09117454 0.08762824 0.08683101 0.08652814]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
4
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,1,756,"['notice', 'passed', 'suggested', 'higher', 'dark', 'background', 'larger', 'choice', 'requirement', 'discus']","[0.10493658 0.10426784 0.10232636 0.10052309 0.09887514 0.09880958
5
+ 0.0980336 0.09770604 0.09764653 0.09687528]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
6
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,2,518,"['pub', 'ftp', 'edu', 'format', 'archive', 'image', 'graphic', 'version', 'faq', 'site']","[0.11425608 0.11422319 0.11114949 0.1088886 0.10393708 0.09187336
7
+ 0.09019504 0.08906046 0.08828697 0.08812961]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
8
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,3,1323,"['atf', 'fire', 'waco', 'fbi', 'disclaimer', 'koresh', 'gas', 'survivor', 'reserve', 'bank']","[0.11390629 0.10463645 0.10274681 0.10265718 0.10119981 0.10058824
9
+ 0.09597317 0.0939759 0.0926476 0.09166859]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
10
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,4,478,"['people', 'right', 'article', 'atheism', 'homosexual', 'would', 'israel', 'writes', 'one', 'argument']","[0.12244946 0.10862308 0.10516351 0.10412805 0.10212132 0.10055386
11
+ 0.095933 0.09209377 0.08563491 0.08329905]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
12
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,5,704,"['trouble', 'suggested', 'notice', 'avoid', 'count', 'choice', 'higher', 'consideration', 'changed', 'passed']","[0.10687547 0.1035945 0.10281763 0.10066567 0.09871156 0.09862704
13
+ 0.09789448 0.09716429 0.09714134 0.09650805]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
14
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,6,1061,"['car', 'engine', 'bike', 'dod', 'ride', 'mile', 'auto', 'oil', 'ford', 'motorcycle']","[0.12913983 0.11862411 0.11026718 0.09704107 0.09644363 0.09473452
15
+ 0.0923589 0.09197619 0.08799469 0.08141985]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
16
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,7,518,"['soviet', 'turkish', 'armenia', 'turk', 'armenian', 'genocide', 'muslim', 'soldier', 'people', 'russian']","[0.10896654 0.10893553 0.10683964 0.10593498 0.10491596 0.10097778
17
+ 0.10019729 0.09013389 0.08842223 0.08467618]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
18
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,8,564,"['jesus', 'god', 'church', 'one', 'bible', 'christ', 'scripture', 'spirit', 'passage', 'sin']","[0.11491191 0.11250298 0.11043111 0.10765673 0.1024358 0.09674171
19
+ 0.09472436 0.08755608 0.08664075 0.0863985 ]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
20
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,9,396,"['max', 'mi', 'md', 'window', 'pl', 'tm', 'ex', 'id', 'ah', 'mb']","[0.13491088 0.10957684 0.10768988 0.1076818 0.10544891 0.09252201
21
+ 0.08989577 0.08594737 0.08437467 0.08195181]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
22
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,10,708,"['objective', 'morality', 'moral', 'god', 'christianity', 'exist', 'faith', 'writes', 'christian', 'atheist']","[0.109519 0.10796002 0.10678478 0.1035361 0.10269511 0.09694914
23
+ 0.09588029 0.0944791 0.0914978 0.09069863]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
24
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,11,876,"['window', 'application', 'manager', 'font', 'help', 'thanks', 'screen', 'mouse', 'motif', 'graphic']","[0.13398996 0.1091726 0.10379196 0.10361101 0.10081472 0.09725458
25
+ 0.0934235 0.08959287 0.08454128 0.08380754]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
26
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,12,1038,"['card', 'board', 'monitor', 'video', 'thanks', 'port', 'offer', 'driver', 'modem', 'drive']","[0.11984834 0.1134081 0.10677309 0.09944587 0.0985932 0.0959444
27
+ 0.09349772 0.09153173 0.09102782 0.0899297 ]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
28
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,13,808,"['red', 'boston', 'leaf', 'fan', 'playoff', 'blue', 'lost', 'baseball', 'game', 'ranger']","[0.10695428 0.10541338 0.10267578 0.10250854 0.09960378 0.09924651
29
+ 0.09874782 0.09704522 0.09397282 0.09383183]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
30
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,14,731,"['would', 'think', 'people', 'president', 'know', 'going', 'get', 'time', 'fbi', 'right']","[0.13352683 0.1260272 0.11690333 0.10108264 0.10049071 0.08579428
31
+ 0.08460232 0.08453774 0.08370963 0.08332541]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
32
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,15,713,"['scsi', 'drive', 'ide', 'bus', 'disk', 'controller', 'mac', 'problem', 'card', 'mb']","[0.12090454 0.11668422 0.11019469 0.10720176 0.09680493 0.09506889
33
+ 0.09220924 0.08808973 0.08799659 0.08484541]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
34
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,16,811,"['chip', 'encryption', 'clipper', 'escrow', 'wiretap', 'crypto', 'key', 'secret', 'secure', 'nsa']","[0.11366736 0.10840231 0.10831383 0.09918499 0.09748073 0.09735812
35
+ 0.09556631 0.09536911 0.09293538 0.09172186]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
36
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,17,835,"['ac', 'sale', 'shipping', 'email', 'looking', 'distribution', 'asking', 'usa', 'express', 'nntp']","[0.10743178 0.10743141 0.10255697 0.10167028 0.09907106 0.09891009
37
+ 0.09625679 0.09599011 0.09570176 0.09497967]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
38
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,18,800,"['nntp', 'posting', 'host', 'distribution', 'usa', 'sale', 'offer', 'interested', 'ca', 'ohio']","[0.13477376 0.13057682 0.12034003 0.10569087 0.10016396 0.08343355
39
+ 0.08232912 0.08194834 0.08051746 0.08022609]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
40
+ 1687106817,ctm,"{'num_epochs': 100, 'lr': 0.002, 'batch_size': 64}",20news,20,False,19,706,"['year', 'game', 'player', 'team', 'appears', 'season', 'hockey', 'league', 'play', 'st']","[0.12629244 0.11492042 0.1056929 0.10263036 0.09683751 0.09508488
41
+ 0.0916804 0.08949064 0.08876076 0.08860979]",20,20,1869.52,0.87,0.9912969946869549,0.11664103691784058,0.6771646204844821,0.9191184609366716
data/_20news_df_output_topic_word_LDA.csv ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id,method,method_specific_params,dataset,num_given_topics,reduced,topic_num,topic_size,topic_words,word_scores,num_detected_topics,num_final_topics,duration_secs,diversity_unique,diversity_inv_rbo,coherence_npmi,coherence_v,rand_index
2
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,0,1983,"['people', 'would', 'think', 'writes', 'one', 'article', 'say', 'like', 'even', 'right']","[0.15944211 0.15224469 0.12663063 0.09782166 0.08654518 0.08315944
3
+ 0.08102202 0.07570259 0.06934862 0.06808303]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
4
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,1,1392,"['game', 'line', 'subject', 'organization', 'hockey', 'pittsburgh', 'jason', 'sale', 'bike', 'one']","[0.15864137 0.15104766 0.1412904 0.14096186 0.09204587 0.06773349
5
+ 0.06546046 0.06349892 0.06315827 0.05616174]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
6
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,2,1088,"['key', 'file', 'line', 'subject', 'organization', 'user', 'program', 'command', 'copy', 'system']","[0.24934617 0.14403053 0.09751268 0.07934412 0.07487413 0.07465485
7
+ 0.07421384 0.07272138 0.06740832 0.06589408]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
8
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,3,74,"['power', 'ray', 'supply', 'second', 'period', 'speed', 'physical', 'led', 'detroit', 'drive']","[0.36320662 0.11131906 0.0865567 0.07299744 0.07110922 0.06680124
9
+ 0.0601517 0.05910192 0.05901084 0.0497453 ]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
10
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,4,1463,"['chip', 'card', 'line', 'bus', 'driver', 'board', 'system', 'subject', 'bi', 'port']","[0.25674582 0.14665987 0.09656386 0.08028611 0.0772783 0.07018365
11
+ 0.07017458 0.0688495 0.06784467 0.06541362]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
12
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,5,139,"['mr', 'bd', 'line', 'organization', 'subject', 'la', 'georgia', 'jim', 'tv', 'bb']","[0.40496555 0.08192329 0.07686187 0.07221673 0.07185741 0.06734095
13
+ 0.06571998 0.06564764 0.04696385 0.0465027 ]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
14
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,6,633,"['god', 'one', 'jesus', 'christian', 'people', 'child', 'said', 'say', 'men', 'day']","[0.20740458 0.13620628 0.12195267 0.09815736 0.09672838 0.09088862
15
+ 0.07117718 0.06802055 0.05513892 0.05432544]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
16
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,7,1454,"['arab', 'would', 'right', 'clinton', 'get', 'police', 'one', 'know', 'war', 'russian']","[0.12601893 0.12368312 0.10284469 0.10216503 0.09605458 0.0910842
17
+ 0.09088893 0.0896497 0.08962572 0.08798514]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
18
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,8,969,"['system', 'year', 'time', 'problem', 'would', 'water', 'two', 'one', 'season', 'get']","[0.1390074 0.13774833 0.12779438 0.09163507 0.08726825 0.0860028
19
+ 0.08587282 0.08329464 0.08109743 0.08027877]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
20
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,9,1941,"['organization', 'line', 'subject', 'university', 'posting', 'host', 'nntp', 'edu', 'article', 'writes']","[0.13164079 0.12725668 0.12707195 0.11136677 0.10630848 0.09919478
21
+ 0.09862357 0.07071686 0.06533524 0.06248491]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
22
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,10,789,"['one', 'fbi', 'may', 'people', 'use', 'many', 'christian', 'question', 'also', 'evidence']","[0.17698498 0.12187716 0.10865493 0.09114419 0.08722291 0.0852498
23
+ 0.08284143 0.08273355 0.08256708 0.08072395]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
24
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,11,710,"['space', 'program', 'mission', 'system', 'page', 'file', 'available', 'package', 'information', 'also']","[0.1585192 0.14749347 0.09407386 0.08963598 0.08897515 0.08795281
25
+ 0.08662158 0.08649572 0.08510548 0.07512669]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
26
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,12,463,"['group', 'weapon', 'would', 'state', 'american', 'law', 'new', 'article', 'organization', 'day']","[0.11562134 0.11349647 0.10780852 0.10352351 0.1024464 0.09807958
27
+ 0.0907825 0.0898603 0.08921809 0.08916336]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
28
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,13,97,"['muslim', 'max', 'tm', 'pl', 'religion', 'islam', 'tin', 'newsreader', 'ex', 'version']","[0.20428702 0.1473811 0.09972491 0.09721643 0.09494188 0.09063093
29
+ 0.07441587 0.07242449 0.06325752 0.05571985]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
30
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,14,2438,"['line', 'subject', 'organization', 'posting', 'host', 'nntp', 'writes', 'article', 'problem', 'one']","[0.16525298 0.14868902 0.13808452 0.0916653 0.08841628 0.08717743
31
+ 0.08092585 0.07759399 0.06161902 0.06057569]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
32
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,15,470,"['government', 'gas', 'would', 'warrant', 'right', 'sex', 'line', 'encryption', 'subject', 'tank']","[0.19382758 0.13207996 0.11948839 0.10724095 0.08666767 0.08636794
33
+ 0.07251114 0.07062805 0.06617918 0.06500917]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
34
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,16,103,"['st', 'frank', 'man', 'ed', 'armed', 'survivor', 'new', 'line', 'subject', 'spirit']","[0.12606965 0.11788099 0.10643179 0.10502937 0.09843564 0.09841796
35
+ 0.09526637 0.09022573 0.08191948 0.08032307]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
36
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,17,788,"['subject', 'line', 'organization', 'god', 'writes', 'would', 'greek', 'article', 'jew', 'know']","[0.11895489 0.11156828 0.10866257 0.10448369 0.10438665 0.102501
37
+ 0.09522998 0.09161492 0.09004647 0.07255147]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
38
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,18,1158,"['writes', 'article', 'apr', 'line', 'brian', 'subject', 'organization', 'israel', 'insurance', 'good']","[0.18839596 0.17242719 0.09725275 0.08833881 0.08613019 0.08504379
39
+ 0.08398097 0.07830536 0.06050047 0.05962449]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
40
+ 1687099886,lda,{'alpha': 0.1},20news,20,False,19,694,"['line', 'organization', 'subject', 'university', 'host', 'posting', 'nntp', 'western', 'usa', 'ohio']","[0.11497776 0.11235808 0.1098053 0.10738717 0.09731661 0.0971822
41
+ 0.09494393 0.09386095 0.08933801 0.08282992]",20,20,199.09,0.635,0.9018807954953008,0.06993716674819705,0.5295167193987539,0.8930608444752256
data/_20news_df_output_topic_word_NMF.csv ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id,method,method_specific_params,dataset,num_given_topics,reduced,topic_num,topic_size,topic_words,word_scores,num_detected_topics,num_final_topics,duration_secs,diversity_unique,diversity_inv_rbo,coherence_npmi,coherence_v,rand_index
2
+ 1687106317,nmf,{},20news,20,False,0,361,"['god', 'line', 'output', 'lord', 'christ', 'would', 'book', 'problem', 'subject', 'organization']","[0.34979106 0.12350973 0.0947608 0.09453389 0.07471375 0.05836609
3
+ 0.05477346 0.05082214 0.05022724 0.04850186]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
4
+ 1687106317,nmf,{},20news,20,False,1,1066,"['drive', 'system', 'disk', 'mac', 'hard', 'problem', 'get', 'scsi', 'also', 'card']","[0.19116262 0.15926939 0.12734865 0.11779287 0.08843095 0.07085378
5
+ 0.06267696 0.06198804 0.06154982 0.0589269 ]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
6
+ 1687106317,nmf,{},20news,20,False,2,44,"['graphic', 'file', 'pub', 'mail', 'ray', 'send', 'edu', 'object', 'archive', 'output']","[0.18261122 0.17998113 0.1156435 0.0977497 0.09352912 0.08524382
7
+ 0.0849599 0.05835556 0.0533657 0.04856034]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
8
+ 1687106317,nmf,{},20news,20,False,3,34,"['max', 'pl', 'tm', 'mb', 'mr', 'mi', 'ex', 'ah', 'au', 'de']","[0.81594648 0.06553909 0.04312668 0.02029965 0.01533819 0.01430676
9
+ 0.01005241 0.006095 0.00528083 0.0040149 ]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
10
+ 1687106317,nmf,{},20news,20,False,4,1270,"['one', 'use', 'get', 'two', 'see', 'also', 'new', 'time', 'first', 'year']","[0.23971018 0.09956428 0.09932129 0.09751522 0.08508889 0.08494885
11
+ 0.07695754 0.07451225 0.07415631 0.06822519]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
12
+ 1687106317,nmf,{},20news,20,False,5,53,"['image', 'gif', 'color', 'format', 'quality', 'software', 'pixel', 'free', 'tool', 'version']","[0.45527478 0.1064173 0.09178239 0.08095739 0.05727959 0.04782921
13
+ 0.04337543 0.04194787 0.03787981 0.03725623]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
14
+ 1687106317,nmf,{},20news,20,False,6,49,"['available', 'ftp', 'edu', 'pub', 'version', 'format', 'graphic', 'package', 'source', 'contact']","[0.16663209 0.14746206 0.14448388 0.10769796 0.10278298 0.08357541
15
+ 0.0707831 0.06603169 0.05688584 0.053665 ]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
16
+ 1687106317,nmf,{},20news,20,False,7,62,"['system', 'data', 'drive', 'disk', 'support', 'feature', 'software', 'space', 'user', 'rom']","[0.20376969 0.1854125 0.11060066 0.09022935 0.08683401 0.08307478
17
+ 0.0710087 0.05707783 0.05616885 0.05582363]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
18
+ 1687106317,nmf,{},20news,20,False,8,134,"['data', 'image', 'available', 'also', 'information', 'sun', 'based', 'set', 'tool', 'motif']","[0.17411892 0.16602923 0.09961587 0.09704964 0.08925781 0.08008402
19
+ 0.07975093 0.07610741 0.07116763 0.06681855]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
20
+ 1687106317,nmf,{},20news,20,False,9,4899,"['would', 'know', 'writes', 'article', 'think', 'line', 'organization', 'subject', 'year', 'like']","[0.14483345 0.11344227 0.11183915 0.11130367 0.10084824 0.09255977
21
+ 0.0924911 0.08888184 0.07933589 0.06446461]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
22
+ 1687106317,nmf,{},20news,20,False,10,303,"['state', 'law', 'use', 'information', 'question', 'new', 'american', 'must', 'example', 'may']","[0.13702646 0.11159397 0.11068197 0.10648077 0.1030514 0.1015426
23
+ 0.08623879 0.08187689 0.08137951 0.08012766]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
24
+ 1687106317,nmf,{},20news,20,False,11,209,"['jesus', 'said', 'people', 'say', 'know', 'one', 'armenian', 'day', 'lord', 'come']","[0.15556769 0.15193523 0.13980515 0.12600016 0.09142169 0.07601694
25
+ 0.07501137 0.06542825 0.06164289 0.05717063]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
26
+ 1687106317,nmf,{},20news,20,False,12,128,"['file', 'image', 'program', 'gif', 'format', 'gun', 'color', 'output', 'section', 'may']","[0.40080525 0.11953442 0.10542488 0.0751564 0.0649226 0.05394245
27
+ 0.05294286 0.04787489 0.04281177 0.03658447]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
28
+ 1687106317,nmf,{},20news,20,False,13,7652,"['line', 'organization', 'subject', 'writes', 'article', 'host', 'nntp', 'people', 'world', 'university']","[0.15852053 0.14480727 0.14317839 0.11433758 0.10727352 0.0804632
29
+ 0.07618727 0.06295255 0.0575845 0.0546952 ]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
30
+ 1687106317,nmf,{},20news,20,False,14,90,"['god', 'atheist', 'atheism', 'religion', 'religious', 'christian', 'belief', 'many', 'believe', 'one']","[0.22380489 0.18513454 0.09868232 0.09665265 0.07770896 0.07635449
31
+ 0.07468084 0.06507333 0.06193238 0.03997562]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
32
+ 1687106317,nmf,{},20news,20,False,15,219,"['mail', 'computer', 'internet', 'server', 'anonymous', 'posting', 'system', 'user', 'privacy', 'information']","[0.12141562 0.10778277 0.10736879 0.10597809 0.10154666 0.09926864
33
+ 0.09909393 0.09225525 0.08398684 0.0813034 ]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
34
+ 1687106317,nmf,{},20news,20,False,16,37,"['pl', 'md', 'sc', 'ah', 'id', 'wa', 'mr', 'st', 'gif', 'tm']","[0.17065527 0.13395293 0.10855263 0.10596626 0.09627617 0.09583026
35
+ 0.09455009 0.07227479 0.06647285 0.05546874]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
36
+ 1687106317,nmf,{},20news,20,False,17,749,"['do', 'window', 'file', 'microsoft', 'program', 'mouse', 'output', 'font', 'version', 'server']","[0.35849979 0.21097385 0.07707617 0.06976072 0.05095114 0.04960919
37
+ 0.04793472 0.0451268 0.04512359 0.04494403]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
38
+ 1687106317,nmf,{},20news,20,False,18,319,"['bit', 'use', 'color', 'window', 'driver', 'mac', 'program', 'version', 'scsi', 'hardware']","[0.2273714 0.12952355 0.10517634 0.09524074 0.08628512 0.0836056
39
+ 0.08155865 0.06488489 0.06451468 0.06183903]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
40
+ 1687106317,nmf,{},20news,20,False,19,1168,"['people', 'would', 'one', 'think', 'make', 'president', 'know', 'mr', 'well', 'even']","[0.17643617 0.14980483 0.14105327 0.10093348 0.07604843 0.07378741
41
+ 0.07171568 0.07168691 0.06959962 0.06893418]",20,20,367.93,0.645,0.9608968291743609,0.10370885185486564,0.6470614670550731,0.7350220371300955
data/_20news_df_output_topic_word_Top2Vec.csv ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id,method,method_specific_params,dataset,num_given_topics,reduced,topic_num,topic_size,topic_words,word_scores,num_detected_topics,num_final_topics,duration_secs
2
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,0,1616,"['bible' 'god' 'jesus' 'religion' 'scripture' 'christianity' 'christ'
3
+ 'atheist' 'prophet' 'christian' 'religious' 'church' 'muslim' 'theist'
4
+ 'qur' 'verse' 'doctrine' 'gospel' 'belief' 'holy' 'testament' 'psalm'
5
+ 'islam' 'apostle' 'faith' 'believer' 'theology' 'clh' 'heresy' 'quran'
6
+ 'congregation' 'atheism' 'interpretation' 'disciple' 'revelation'
7
+ 'catholic' 'armenian' 'salvation' 'truth' 'corinthian' 'khomeini'
8
+ 'baptism' 'sin' 'persecution' 'teaching' 'turkish' 'passage' 'hebrew'
9
+ 'biblical' 'isaiah']","[0.44676337 0.44158506 0.4414148 0.4250672 0.42309386 0.41805178
10
+ 0.4160752 0.4154315 0.41115087 0.4075917 0.3984454 0.3958729
11
+ 0.3906172 0.39005995 0.38813427 0.38569444 0.38469318 0.37988603
12
+ 0.37768558 0.37669003 0.374072 0.37036234 0.36953464 0.36884975
13
+ 0.36864585 0.3683077 0.36524647 0.3640772 0.3627922 0.35627204
14
+ 0.35473567 0.3525921 0.35250074 0.350264 0.34985965 0.34982705
15
+ 0.34791228 0.34752375 0.34076905 0.34058702 0.3389619 0.33720076
16
+ 0.33691663 0.33592725 0.33553275 0.3345945 0.33261788 0.3306961
17
+ 0.3291487 0.3282006 ]",236,20,1337.527
18
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,1,1433,"['window' 'olwm' 'xterm' 'openwindows' 'xdefaults' 'pointer' 'menu'
19
+ 'intrinsics' 'client' 'xdm' 'widget' 'openwin' 'bitmap' 'colormap' 'xlib'
20
+ 'ncd' 'screen' 'file' 'server' 'icon' 'manager' 'display' 'application'
21
+ 'directory' 'button' 'config' 'default' 'ini' 'pixmap' 'openlook' 'font'
22
+ 'sunos' 'paste' 'exit' 'sparc' 'mwm' 'console' 'xaw' 'ctrl' 'twm' 'xpert'
23
+ 'ndet' 'running' 'program' 'args' 'mouse' 'fprintf' 'motif' 'contrib'
24
+ 'exe']","[0.4715806 0.40489066 0.38360178 0.38229018 0.38183886 0.36847085
25
+ 0.36512193 0.36083105 0.35867202 0.35434687 0.3492447 0.34910566
26
+ 0.3489738 0.3475267 0.34732035 0.34555644 0.3450095 0.34349802
27
+ 0.3393754 0.33892933 0.33685613 0.3360705 0.3314531 0.33126542
28
+ 0.32636884 0.32313222 0.31982037 0.31860113 0.31551248 0.3133896
29
+ 0.30953708 0.30911303 0.30873692 0.30854362 0.3078321 0.30708423
30
+ 0.30538025 0.3053298 0.303819 0.30295336 0.30166018 0.30153474
31
+ 0.30145884 0.3011434 0.29960972 0.29814076 0.2973088 0.29728878
32
+ 0.29679278 0.2967768 ]",236,20,1337.527
33
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,2,1352,"['apple' 'sale' 'shipping' 'powerbook' 'coprocessor' 'lciii' 'price'
34
+ 'simm' 'brand' 'offer' 'iisi' 'fpu' 'iici' 'centris' 'nubus' 'mac'
35
+ 'forsale' 'slot' 'vram' 'printer' 'simms' 'selling' 'quadra' 'bought'
36
+ 'sell' 'upgrade' 'monitor' 'adapter' 'cartridge' 'duo' 'external'
37
+ 'purchased' 'lc' 'plus' 'asking' 'buying' 'sony' 'portable' 'interested'
38
+ 'buyer' 'dpi' 'ethernet' 'obo' 'cod' 'ram' 'deskjet' 'laserjet' 'modem'
39
+ 'mhz' 'hanover']","[0.42084357 0.3996125 0.3828575 0.3738153 0.35114548 0.3508577
40
+ 0.35029182 0.34458438 0.34436062 0.34023166 0.3397068 0.33833215
41
+ 0.33543992 0.33508453 0.3297907 0.3284306 0.3256174 0.32163784
42
+ 0.31535777 0.313064 0.31085122 0.3099688 0.3077851 0.30697435
43
+ 0.30588296 0.30419356 0.3019396 0.2984153 0.29826868 0.29718032
44
+ 0.29222956 0.28986603 0.28650752 0.28592142 0.28544748 0.2840621
45
+ 0.28394428 0.28078222 0.2798127 0.27918035 0.27807266 0.27502447
46
+ 0.27429968 0.27302492 0.27251047 0.27248186 0.27171037 0.2713627
47
+ 0.27132747 0.27121824]",236,20,1337.527
48
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,3,1339,"['gun' 'firearm' 'criminal' 'handgun' 'crime' 'nra' 'constitutional'
49
+ 'democrat' 'homicide' 'politician' 'abiding' 'cramer' 'clayton' 'brady'
50
+ 'liberal' 'homosexual' 'amendment' 'rkba' 'federal' 'voter' 'clinton'
51
+ 'constitution' 'economic' 'social' 'shotgun' 'ban' 'government' 'tax'
52
+ 'optilink' 'veal' 'gay' 'liberty' 'abuse' 'citizen' 'atheist' 'legal'
53
+ 'self' 'violent' 'minority' 'petaluma' 'privacy' 'drug' 'majority'
54
+ 'cipriani' 'enforce' 'election' 'freedom' 'heterosexual' 'jail' 'health']","[0.38347796 0.3787889 0.3679612 0.36430764 0.3554895 0.3511525
55
+ 0.34857452 0.34014687 0.33959094 0.3372019 0.33454198 0.32582545
56
+ 0.31983513 0.3168776 0.31487548 0.31218153 0.31092322 0.3103757
57
+ 0.30537516 0.30387998 0.30337164 0.30098853 0.298684 0.2978992
58
+ 0.29771733 0.29765505 0.29524988 0.29269737 0.2911522 0.28939897
59
+ 0.28611898 0.28418207 0.27660564 0.2763301 0.27374923 0.272509
60
+ 0.2718676 0.27084577 0.26999676 0.26828033 0.26778233 0.26767984
61
+ 0.26732287 0.2665463 0.2658036 0.26460105 0.26452196 0.26405072
62
+ 0.26368138 0.26102233]",236,20,1337.527
63
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,4,1119,"['playoff' 'hockey' 'bruin' 'nhl' 'goalie' 'bure' 'goaltender' 'mogilny'
64
+ 'blackhawks' 'islander' 'lafontaine' 'potvin' 'canadiens' 'lemieux'
65
+ 'powerplay' 'quebec' 'gilmour' 'canuck' 'penguin' 'whaler' 'selanne'
66
+ 'defenseman' 'puck' 'shanahan' 'overtime' 'yzerman' 'dchhabra' 'fuhr'
67
+ 'habs' 'recchi' 'barrasso' 'gretzky' 'cup' 'stanley' 'domi' 'soderstrom'
68
+ 'adirondack' 'oiler' 'sabre' 'probert' 'chelios' 'messier' 'rauser'
69
+ 'flyer' 'isle' 'skate' 'edmonton' 'tournament' 'dineen' 'devil']","[0.46590942 0.44713598 0.4239912 0.42257857 0.4217516 0.4191884
70
+ 0.4092419 0.40255633 0.39616874 0.3945594 0.38332573 0.38262597
71
+ 0.3762384 0.37487462 0.3743045 0.37386978 0.373065 0.36891234
72
+ 0.36809236 0.36592788 0.36473086 0.36453372 0.3636976 0.359594
73
+ 0.35779458 0.3555419 0.35410357 0.35251305 0.3486529 0.34727
74
+ 0.3464234 0.34516758 0.34137625 0.34075826 0.33696085 0.33651152
75
+ 0.33625218 0.33311346 0.3329189 0.33219948 0.32794854 0.32602125
76
+ 0.32478017 0.3216294 0.32005927 0.3147339 0.31425625 0.31333056
77
+ 0.3040147 0.30185115]",236,20,1337.527
78
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,5,1073,"['championship' 'mail' 'hockey' 'email' 'sweden' 'nhl' 'canada' 'please'
79
+ 'hello' 'reply' 'internet' 'motorcycle' 'hi' 'staffan' 'space'
80
+ 'tournament' 'se' 'subject' 'appreciate' 'finland' 'po' 'sci' 'qw' 'sale'
81
+ 'playoff' 'thanks' 'university' 'appreciated' 'posting' 'carnegie' 'xj'
82
+ 'mailing' 'fax' 'send' 'honda' 'engineering' 'computer' 'news' 'cmu'
83
+ 'moa' 'computing' 'thanx' 'yr' 'usa' 'advance' 'hv' 'netnews' 'post' 'ua'
84
+ 'phone']","[0.3630246 0.34161925 0.3414568 0.32976812 0.3128215 0.30187768
85
+ 0.2970038 0.29541144 0.29449564 0.29026005 0.2864499 0.2802989
86
+ 0.28006414 0.27813578 0.27792534 0.27719298 0.2759723 0.27250564
87
+ 0.27171326 0.27121776 0.27089795 0.27072582 0.27053788 0.26927954
88
+ 0.26886922 0.26859212 0.26837417 0.2671209 0.26608652 0.2652242
89
+ 0.26476553 0.26268485 0.26216033 0.26212424 0.25979197 0.259427
90
+ 0.25906652 0.25888425 0.25841695 0.25806126 0.25669473 0.25627166
91
+ 0.25586653 0.2556815 0.25503826 0.25497687 0.25455207 0.2544282
92
+ 0.25436395 0.25297272]",236,20,1337.527
93
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,6,917,"['amp' 'circuit' 'voltage' 'capacitor' 'amplifier' 'transistor' 'signal'
94
+ 'receiver' 'ohm' 'transmitter' 'frequency' 'radio' 'antenna' 'shack'
95
+ 'transformer' 'volt' 'tube' 'audio' 'stereo' 'khz' 'metal' 'resistor'
96
+ 'watt' 'pulse' 'grounding' 'meter' 'electronics' 'light' 'detector'
97
+ 'analog' 'filter' 'connector' 'satellite' 'orbit' 'relay' 'conductor'
98
+ 'noise' 'dtmedin' 'ttl' 'wiring' 'gfci' 'oort' 'tv' 'vcr' 'radar' 'lamp'
99
+ 'diameter' 'orbiting' 'switch' 'sensor']","[0.47413015 0.4708779 0.45542216 0.3937438 0.39038676 0.3839948
100
+ 0.38005072 0.37083778 0.36485273 0.35683596 0.35484332 0.34855273
101
+ 0.34549275 0.34522355 0.3375527 0.329933 0.32977253 0.32121888
102
+ 0.31858146 0.31828302 0.31616575 0.31540495 0.3126575 0.311877
103
+ 0.3112969 0.31016406 0.3101191 0.30984265 0.3096066 0.30437598
104
+ 0.30111343 0.29882246 0.2985236 0.29597062 0.2935729 0.29238188
105
+ 0.29194543 0.291157 0.28724945 0.28360963 0.28211528 0.28207964
106
+ 0.28027534 0.27832478 0.2776957 0.27767122 0.27725393 0.2759151
107
+ 0.27582926 0.27270424]",236,20,1337.527
108
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,7,907,"['ide' 'controller' 'vlb' 'scsi' 'motherboard' 'bus' 'drive' 'eisa' 'isa'
109
+ 'svga' 'vram' 'maxtor' 'esdi' 'card' 'dma' 'vesa' 'floppy' 'mhz'
110
+ 'seagate' 'dram' 'meg' 'mfm' 'hdd' 'adapter' 'bios' 'chipset' 'penev'
111
+ 'ati' 'simm' 'slot' 'ram' 'mb' 'jumper' 'simms' 'orchid' 'board'
112
+ 'adaptec' 'nubus' 'dx' 'monitor' 'ultra' 'vga' 'cache' 'hard' 'apple'
113
+ 'benchmark' 'hd' 'trident' 'toshiba' 'brand']","[0.51592463 0.45202082 0.43783426 0.43552488 0.42594004 0.4070964
114
+ 0.3885573 0.38779846 0.38645172 0.3809911 0.3721417 0.369662
115
+ 0.3677196 0.36494362 0.35803476 0.3537441 0.3507664 0.3488544
116
+ 0.34600908 0.3442523 0.3434435 0.34212285 0.33911508 0.32981914
117
+ 0.32907662 0.32285517 0.3209666 0.3203686 0.3188119 0.31582078
118
+ 0.3149792 0.30925786 0.30752766 0.30566144 0.3021134 0.30166447
119
+ 0.2938785 0.29251978 0.29103965 0.28887713 0.2865978 0.2865807
120
+ 0.285491 0.28390944 0.28378698 0.2835799 0.28333062 0.28302824
121
+ 0.28207874 0.28105193]",236,20,1337.527
122
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,8,863,"['pitching' 'goaltender' 'season' 'defenseman' 'pitcher' 'inning'
123
+ 'sandberg' 'obp' 'playoff' 'rbi' 'scored' 'goalie' 'player' 'team'
124
+ 'batter' 'batting' 'bullpen' 'nhl' 'whaler' 'alomar' 'shutout' 'rookie'
125
+ 'recchi' 'gretzky' 'baerga' 'fielder' 'soderstrom' 'mattingly'
126
+ 'blackhawks' 'hitter' 'game' 'dodger' 'puck' 'oriole' 'snichols' 'hockey'
127
+ 'sox' 'tampa' 'winfield' 'potvin' 'talent' 'sherri' 'clemens' 'sanderson'
128
+ 'score' 'yzerman' 'career' 'pitched' 'gilmour' 'winning']","[0.38014483 0.3789016 0.37256125 0.36911368 0.353766 0.3475885
129
+ 0.3414901 0.33900553 0.33893228 0.33813792 0.33412263 0.33359426
130
+ 0.33340466 0.32941717 0.3285423 0.32582566 0.32435375 0.32421935
131
+ 0.3235423 0.32162356 0.3206997 0.31891322 0.3174274 0.31570834
132
+ 0.3127724 0.31258675 0.30754888 0.30717647 0.30560625 0.30555865
133
+ 0.30293164 0.30011946 0.29883778 0.295937 0.29524797 0.29420918
134
+ 0.2931141 0.29203683 0.2913671 0.29120994 0.29103744 0.2905165
135
+ 0.28822783 0.28763106 0.28509793 0.28462318 0.283857 0.28375292
136
+ 0.28373525 0.28345263]",236,20,1337.527
137
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,9,860,"['atheist' 'morality' 'theist' 'livesey' 'dwyer' 'atheism' 'bobbe'
138
+ 'mozumder' 'beauchaine' 'solntze' 'moral' 'schneider' 'kmr' 'bil' 'wpd'
139
+ 'okcforum' 'benedikt' 'jaeger' 'allan' 'agnostic' 'cookamunga' 'motto'
140
+ 'islamic' 'theism' 'religion' 'keith' 'rosenau' 'evolution' 'mathew'
141
+ 'rushdie' 'conner' 'objective' 'darice' 'rusnews' 'gregg' 'khomeini'
142
+ 'punisher' 'sandvik' 'objectively' 'halat' 'exist' 'specie' 'immoral'
143
+ 'healta' 'quran' 'mantis' 'subjective' 'believer' 'belief' 'islam']","[0.50602657 0.44531634 0.43936396 0.42137936 0.41620952 0.40997714
144
+ 0.4004252 0.39174497 0.39160943 0.38147688 0.37546414 0.36531267
145
+ 0.3594244 0.3445725 0.3362559 0.33301878 0.32759196 0.3252498
146
+ 0.31651446 0.3143277 0.31357464 0.30626386 0.3033719 0.30169833
147
+ 0.30109283 0.30084568 0.2945159 0.29261556 0.29168844 0.29025084
148
+ 0.28797036 0.28596056 0.28305757 0.28301924 0.28264028 0.28140482
149
+ 0.27835423 0.27604178 0.27072585 0.2643424 0.25929388 0.25921416
150
+ 0.256099 0.25507405 0.25278842 0.25175795 0.25021374 0.24655274
151
+ 0.24615248 0.24575195]",236,20,1337.527
152
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,10,832,"['membership' 'conference' 'campus' 'siggraph' 'registration' 'hotel'
153
+ 'tour' 'proceeding' 'information' 'acm' 'service' 'mail' 'cryptology'
154
+ 'dinner' 'june' 'student' 'suite' 'electronic' 'participant' 'attend'
155
+ 'july' 'thursday' 'address' 'avenue' 'interested' 'space' 'project'
156
+ 'ticket' 'publication' 'tutorial' 'bethesda' 'monday' 'sponsor' 'hill'
157
+ 'payment' 'offer' 'airport' 'director' 'meeting' 'bulletin' 'weekend'
158
+ 'participation' 'letter' 'association' 'county' 'fee' 'email'
159
+ 'newsletter' 'motorcycle' 'schedule']","[0.37641203 0.37549758 0.3680211 0.3671983 0.3637733 0.34958172
160
+ 0.3426261 0.3379334 0.32778293 0.32679507 0.3247376 0.3163039
161
+ 0.31118208 0.3098779 0.30920613 0.30706063 0.30275297 0.30152684
162
+ 0.30137172 0.30098063 0.29588354 0.2929973 0.29049584 0.29006344
163
+ 0.28908098 0.28858015 0.28830767 0.28723055 0.28255087 0.28237736
164
+ 0.2820445 0.28091612 0.27746138 0.2772538 0.27671865 0.2765438
165
+ 0.2757618 0.27530587 0.2751091 0.27469456 0.27454117 0.27440792
166
+ 0.2739563 0.27242443 0.2705129 0.26944393 0.26797917 0.26713628
167
+ 0.26711327 0.26683828]",236,20,1337.527
168
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,11,776,"['fbi' 'atf' 'davidians' 'koresh' 'compound' 'batf' 'waco' 'bd' 'fire'
169
+ 'ranch' 'survivor' 'dividian' 'raid' 'grenade' 'tear' 'warrant' 'cnn'
170
+ 'reno' 'tank' 'armored' 'cult' 'burn' 'agent' 'suicide' 'knock' 'tavares'
171
+ 'follower' 'jmd' 'stove' 'investigation' 'cdt' 'paranoid' 'janet' 'fed'
172
+ 'burned' 'inside' 'knocking' 'branch' 'arras' 'assault' 'burning' 'gas'
173
+ 'started' 'hostage' 'roby' 'building' 'stratus' 'flee' 'happened'
174
+ 'davidian']","[0.5663208 0.5527084 0.54346037 0.52827954 0.5163899 0.5084876
175
+ 0.49466527 0.48884875 0.4885131 0.45129156 0.44314212 0.42619407
176
+ 0.42141107 0.41937637 0.3949927 0.39384326 0.37510183 0.37389514
177
+ 0.37186143 0.3610953 0.35808873 0.3538611 0.34679633 0.34508017
178
+ 0.3363666 0.3325144 0.33024508 0.3277991 0.32340246 0.32110196
179
+ 0.32067192 0.3200016 0.31780922 0.3147901 0.31345642 0.31323874
180
+ 0.3103191 0.31015503 0.30978537 0.3096369 0.30926692 0.3066926
181
+ 0.30661058 0.29615265 0.2940686 0.29287642 0.29033172 0.2900957
182
+ 0.28810024 0.28578383]",236,20,1337.527
183
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,12,772,"['riding' 'egreen' 'motorcycle' 'dod' 'countersteering' 'biker' 'bikers'
184
+ 'bike' 'squid' 'ride' 'helmet' 'cousineau' 'drinking' 'cookson' 'rider'
185
+ 'infante' 'npet' 'karr' 'pettefar' 'jacket' 'mjs' 'blaine' 'puck'
186
+ 'newbie' 'bgardner' 'pillion' 'playoff' 'skate' 'coach' 'lean' 'levine'
187
+ 'balcony' 'drunk' 'boom' 'hockey' 'mamma' 'infield' 'jody' 'harley'
188
+ 'keeper' 'sorenson' 'sabre' 'nhl' 'azerbaijani' 'ranck' 'speedy'
189
+ 'cruiser' 'wheelies' 'lyuda' 'ama']","[0.41461077 0.39282525 0.37715077 0.36484554 0.35063148 0.33479384
190
+ 0.33462274 0.33298865 0.3319907 0.32893145 0.32492083 0.3105444
191
+ 0.28028214 0.27274084 0.27210125 0.26896492 0.26730326 0.26370856
192
+ 0.2634203 0.26052043 0.25942084 0.2590174 0.2586859 0.2586578
193
+ 0.2524207 0.2488463 0.24796811 0.24651901 0.24457517 0.23809463
194
+ 0.23794222 0.23766775 0.2366336 0.23643237 0.23621374 0.23604974
195
+ 0.23476706 0.23243055 0.23103555 0.23098774 0.22998813 0.22980058
196
+ 0.22714046 0.22572297 0.22559911 0.22516048 0.22514091 0.22425602
197
+ 0.22397438 0.22318973]",236,20,1337.527
198
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,13,748,"['graphical' 'programming' 'borland' 'gui' 'application' 'motif'
199
+ 'framework' 'toolkit' 'api' 'builder' 'window' 'interactive' 'interface'
200
+ 'package' 'library' 'polygon' 'processing' 'tool' 'programmer' 'oriented'
201
+ 'graphic' 'toolkits' 'microsoft' 'openlook' 'functionality' 'turbo'
202
+ 'graph' 'modeling' 'routine' 'compiler' 'widget' 'intrinsics' 'drawing'
203
+ 'osf' 'architecture' 'platform' 'pc' 'networking' 'apps' 'operating'
204
+ 'fortran' 'looking' 'vertex' 'spline' 'phigs' 'manual' 'unix' 'feature'
205
+ 'simulation' 'workstation']","[0.4384119 0.3882578 0.38631064 0.38347492 0.38143724 0.37813997
206
+ 0.36548048 0.35531527 0.35095713 0.3509377 0.34626365 0.3449847
207
+ 0.34453845 0.34367275 0.3410035 0.34039888 0.33888048 0.32572466
208
+ 0.3247982 0.32260007 0.31918436 0.31766117 0.31704426 0.31185216
209
+ 0.3096385 0.30828592 0.3052628 0.30463484 0.30449712 0.30253798
210
+ 0.29837552 0.29726636 0.29659134 0.29610744 0.2947264 0.29455125
211
+ 0.29407462 0.29384118 0.29348552 0.29250127 0.29200053 0.29100662
212
+ 0.29074687 0.29074043 0.2904779 0.2890798 0.28778 0.28479168
213
+ 0.28445467 0.28393763]",236,20,1337.527
214
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,14,747,"['clipper' 'escrow' 'encryption' 'scheme' 'escrowed' 'nsa' 'chip' 'crypto'
215
+ 'secure' 'key' 'algorithm' 'encrypted' 'sternlight' 'hellman' 'strnlght'
216
+ 'wiretap' 'classified' 'decrypt' 'strnlghtc' 'excepted' 'pmetzger'
217
+ 'denning' 'secret' 'encrypt' 'plaintext' 'metzger' 'enforcement' 'dsi'
218
+ 'dorothy' 'fed' 'omission' 'tapped' 'laissez' 'decryption' 'tap' 'device'
219
+ 'security' 'ensure' 'ciphertext' 'toal' 'serial' 'qualcomm'
220
+ 'cryptography' 'cryptosystem' 'approval' 'trust' 'agency' 'spook'
221
+ 'communication' 'gtoal']","[0.53622144 0.51438415 0.47407508 0.45463344 0.4485225 0.44310796
222
+ 0.44174838 0.44148 0.44016623 0.43232006 0.42186934 0.41818953
223
+ 0.40332472 0.39804906 0.39571035 0.38707638 0.38706988 0.38338387
224
+ 0.3833515 0.3805896 0.3804127 0.3762514 0.37518197 0.36292812
225
+ 0.3523016 0.3512108 0.3474918 0.34463465 0.3440556 0.34328783
226
+ 0.3383907 0.3301744 0.3297775 0.32963568 0.32737336 0.3259146
227
+ 0.3229884 0.31992412 0.3190465 0.31714386 0.31693232 0.3151013
228
+ 0.31495202 0.31394005 0.3099162 0.30657524 0.3045889 0.30456156
229
+ 0.30431086 0.3039512 ]",236,20,1337.527
230
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,15,737,"['orbit' 'prb' 'shuttle' 'spacecraft' 'mission' 'satellite' 'nsmca'
231
+ 'solar' 'henry' 'launch' 'payload' 'landing' 'flyby' 'zoology' 'orbital'
232
+ 'orbiter' 'manned' 'utzoo' 'ssto' 'galileo' 'lunar' 'nasa' 'pluto'
233
+ 'space' 'servicing' 'digex' 'moon' 'mccall' 'propulsion' 'astronaut'
234
+ 'orbiting' 'flight' 'spencer' 'baalke' 'jsc' 'comet' 'trajectory' 'venus'
235
+ 'funding' 'ssf' 'titan' 'telescope' 'aircraft' 'earth' 'asteroid'
236
+ 'kipling' 'jbh' 'design' 'shafer' 'magellan']","[0.47373086 0.46202362 0.45533887 0.43171287 0.412343 0.410465
237
+ 0.40169445 0.4012512 0.39061272 0.38869444 0.38783133 0.3872778
238
+ 0.3864897 0.38422304 0.37999982 0.37800846 0.37716657 0.37500364
239
+ 0.3725879 0.3695118 0.36791682 0.3657704 0.36366093 0.36269274
240
+ 0.35963058 0.3475752 0.34491754 0.34488463 0.34403253 0.34377724
241
+ 0.3408779 0.33478466 0.333887 0.32768953 0.3273967 0.32607034
242
+ 0.32421684 0.3228647 0.3126804 0.31172872 0.3100717 0.3100507
243
+ 0.31001002 0.30487514 0.30258626 0.3010378 0.29808497 0.29804218
244
+ 0.29652447 0.2953205 ]",236,20,1337.527
245
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,16,697,"['toyota' 'brake' 'sedan' 'car' 'civic' 'torque' 'ford' 'shifter' 'wagon'
246
+ 'engine' 'chevy' 'mustang' 'nissan' 'cruiser' 'rear' 'tire' 'wheel'
247
+ 'integra' 'jeep' 'liter' 'honda' 'taurus' 'suspension' 'gear' 'dealer'
248
+ 'mazda' 'clutch' 'sho' 'accord' 'steering' 'braking' 'rotor' 'opel'
249
+ 'eliot' 'vehicle' 'motorcycle' 'motor' 'shifting' 'camaro' 'convertible'
250
+ 'dodge' 'bike' 'valve' 'porsche' 'nhl' 'mpg' 'seat' 'saturn' 'mile'
251
+ 'highway']","[0.39571 0.37745798 0.37729716 0.36061507 0.360043 0.35976624
252
+ 0.35714692 0.35585535 0.355524 0.35510963 0.35412773 0.34132427
253
+ 0.34061238 0.33668256 0.3362129 0.3349996 0.33462238 0.32848147
254
+ 0.32835317 0.32521552 0.32297438 0.31911826 0.31077892 0.3047754
255
+ 0.30159217 0.2923528 0.29225302 0.29082403 0.2903491 0.29028058
256
+ 0.28864217 0.2883463 0.2877421 0.28653866 0.28255364 0.28221753
257
+ 0.27619636 0.27617338 0.2761493 0.27520862 0.27492434 0.27293944
258
+ 0.27141443 0.26931706 0.2665119 0.26613706 0.26508605 0.26388812
259
+ 0.2630334 0.26206416]",236,20,1337.527
260
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,17,694,"['temperature' 'heat' 'oil' 'drain' 'cooling' 'fuel' 'bike' 'battery'
261
+ 'thermal' 'water' 'dry' 'liquid' 'gas' 'spray' 'behanna' 'steam'
262
+ 'exhaust' 'pump' 'cleaning' 'discharge' 'solvent' 'wood' 'bolt' 'dirt'
263
+ 'oxygen' 'cylinder' 'cold' 'valve' 'atmosphere' 'diesel' 'bottle' 'wax'
264
+ 'surface' 'motorcycle' 'warm' 'removing' 'metal' 'cement' 'tower'
265
+ 'concrete' 'tire' 'emission' 'cool' 'hot' 'tear' 'diameter' 'electricity'
266
+ 'boom' 'air' 'vacuum']","[0.38994113 0.37223655 0.3659422 0.36278605 0.35352123 0.35129756
267
+ 0.3472059 0.34584868 0.3258671 0.32109696 0.3128299 0.3064231
268
+ 0.3031057 0.30245066 0.30225492 0.29960772 0.29874486 0.29400587
269
+ 0.28933424 0.28888935 0.288783 0.28770658 0.28001165 0.27997652
270
+ 0.2772094 0.27563 0.27516425 0.27506196 0.27488017 0.2657459
271
+ 0.26499617 0.26479498 0.26456675 0.264168 0.26409522 0.26169625
272
+ 0.2612601 0.25858256 0.25794652 0.25648054 0.25529054 0.2537981
273
+ 0.2520281 0.2506379 0.25042093 0.24887553 0.2476764 0.24741241
274
+ 0.24695235 0.2457898 ]",236,20,1337.527
275
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,18,686,"['israeli' 'israel' 'arab' 'palestinian' 'livni' 'gaza' 'palestine'
276
+ 'zionist' 'andi' 'tclock' 'beyer' 'jake' 'occupied' 'shostack' 'hamas'
277
+ 'kaufman' 'davidsson' 'jew' 'terrorism' 'jerusalem' 'gazans' 'lebanese'
278
+ 'cpr' 'in' 'hernlem' 'ana' 'hezbollah' 'cwru' 'shai' 'hela' 'propaganda'
279
+ 'oac' 'lebanon' 'reserve' 'redundancy' 'amos' 'syrian' 'syria' 'ghetto'
280
+ 'guday' 'jewish' 'irgun' 'peace' 'jordan' 'elia' 'territory' 'plo' 'iraq'
281
+ 'occupation' 'orion']","[0.5798781 0.4864083 0.47888124 0.4376715 0.4304331 0.4260963
282
+ 0.42182434 0.41124582 0.40849674 0.4030689 0.40131482 0.3953199
283
+ 0.39427945 0.39251405 0.3896283 0.38426933 0.37792894 0.37311408
284
+ 0.37026602 0.3690432 0.36581022 0.36551306 0.36441153 0.36347967
285
+ 0.3634483 0.3600469 0.35887873 0.35557127 0.35184813 0.3517988
286
+ 0.34519568 0.34395516 0.3436267 0.34030044 0.33586282 0.33550203
287
+ 0.32902315 0.32690272 0.32504597 0.31950957 0.3143857 0.31242925
288
+ 0.30857286 0.30692795 0.30610973 0.30222043 0.30131567 0.29541656
289
+ 0.29520452 0.2933391 ]",236,20,1337.527
290
+ 1687099717,top2vec,"{'doc2vec_speed': 'learn', 'min_count': 50, 'embedding_model': 'doc2vec', 'umap_args': {'n_neighbors': 15, 'n_components': 5, 'metric': 'cosine', 'random_state': 42}, 'hdbscan_args': {'min_cluster_size': 15, 'metric': 'euclidean', 'cluster_selection_method': 'eom'}}",20news,20,True,19,678,"['disease' 'patient' 'doctor' 'physician' 'diagnosis' 'treatment'
291
+ 'infection' 'jxp' 'geb' 'vaginal' 'candida' 'vitamin' 'therapy' 'sinus'
292
+ 'diagnosed' 'medication' 'chronic' 'chastity' 'syndrome' 'symptom'
293
+ 'skepticism' 'yeast' 'pregnancy' 'tissue' 'clinical' 'medical' 'diet'
294
+ 'systemic' 'rind' 'intellect' 'shameful' 'bacteria' 'disorder' 'nerve'
295
+ 'antibiotic' 'lyme' 'gordon' 'surgery' 'medicine' 'illness' 'liver'
296
+ 'cure' 'pain' 'quack' 'banschbach' 'dietary' 'kidney' 'migraine' 'dose'
297
+ 'placebo']","[0.51703024 0.5059259 0.49075317 0.47384253 0.46913362 0.46782446
298
+ 0.45764387 0.4515015 0.44527012 0.43146837 0.42805362 0.4274508
299
+ 0.42565894 0.4239667 0.42124498 0.4210152 0.41872454 0.4175253
300
+ 0.4121551 0.41190648 0.4063744 0.40461332 0.4000011 0.39615753
301
+ 0.3931234 0.39151937 0.39004126 0.3890183 0.3871621 0.38580507
302
+ 0.38531238 0.38243055 0.38105106 0.38043314 0.37947708 0.37777084
303
+ 0.37690628 0.37274668 0.3680032 0.3603353 0.3598597 0.35893765
304
+ 0.35835528 0.35703662 0.34972584 0.3469156 0.34666422 0.3463875
305
+ 0.34193394 0.3419305 ]",236,20,1337.527
data/sample_text-old.csv ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id_category,category,data
2
+ 0,alt.atheism,"
3
+
4
+ Ideologies also split, giving more to disagree upon, and may also lead
5
+ to intolerance. So do you also oppose all ideologies?
6
+
7
+ I don't think your argument is an argument against religion at all, but
8
+ just points out the weaknesses of human nature.
9
+
10
+
11
+ I would like a reference if you have got one, for this is news to me.
12
+
13
+
14
+ One must approach the Qur'an with intelligence. Any thinking approach
15
+ to the Qur'an cannot but interpret the above verse and others like it
16
+ that women and men are spiritual equals.
17
+
18
+ I think that the above verse does clearly imply that women have
19
+ souls. Does it make any sense for something without a soul to be
20
+ forgiven? Or to have a great reward (understood to be in the
21
+ after-life)? I think the usual answer would be no -- in which case, the
22
+ part saying ""For them has God prepared forgiveness and a great reward""
23
+ says they have souls.
24
+
25
+ (If it makes sense to say that things without souls can be forgiven, then
26
+ I have no idea _what_ a soul is.)
27
+
28
+ As for your saying that the quote above may not be given a high priority
29
+ in all interpretations, any thinking approach to the Qur'an has to give
30
+ all verses of the Qur'an equal priority. That is because, according to
31
+ Muslim belief, the _whole_ Qur'an is the revelation of God -- in fact,
32
+ denying the truth of any part of the Qur'an is sufficient to be
33
+ considered a disbeliever in Islam.
34
+
35
+
36
+ Look, any approach to the Qur'an must be done with intelligence and
37
+ thought. It is in this fashion that one can try to understand the
38
+ Quran's message. In a book of finite length, it cannot explicitly
39
+ answer every question you want to put to it, but through its teachings
40
+ it can guide you. I think, however, that women are the spiritual equals
41
+ of men is clearly and unambiguously implied in the above verse, and that
42
+ since women can clearly be ""forgiven"" and ""rewarded"" they _must_ have
43
+ souls (from the above verse).
44
+
45
+ Let's try to understand what the Qur'an is trying to teach, rather than
46
+ try to see how many ways it can be misinterpreted by ignoring this
47
+ passage or that passage. The misinterpretations of the Qur'an based on
48
+ ignoring this verse or that verse are infinite, but the interpretations
49
+ fully consistent are more limited. Let's try to discuss these
50
+ interpretations consistent with the text rather than how people can
51
+ ignore this bit or that bit, for that is just showing how people can try
52
+ to twist Islam for their own ends -- something I do not deny -- but
53
+ provides no reflection on the true teachings of Islam whatsoever."
54
+ 1,comp.graphics,"Does ANYONE out there in Net-land have any information on the Cobra 2.20
55
+ card? The sticker on the end of the card reads
56
+ Model: Cobra 1-B-1
57
+ Bios: Cobra v2.20
58
+
59
+ I Havn't been able to find anything about it from anyone! If you have
60
+ any information on how to get a hold of the company which produces the
61
+ card or know where any drivers are for it, PLEASE let me know!
62
+
63
+ As far as I can tell, it's a CGA card that is taking up 2 of my 16-bit
64
+ ISA slots but when I enable the test patterns, it displays much more than
65
+ the usualy 4 CGA colors... At least 16 from what I can count.. Thanks!
66
+
67
+ .------------------------------------------.
68
+ : Internet: jele@eis.calstate.edu :
69
+ : bbs.mirage@gilligan.tsoft.net :
70
+ : bbs.mirage@tsoft.sf-bay.org :
71
+ : mirage@thetech.com :
72
+ : UUCP : apple.com!tsoft!bbs.mirage :
73
+ `------------------------------------------'"
74
+ 2,comp.os.ms-windows.misc,"
75
+
76
+ Two things that annoyed me about the PC Magazine review:
77
+
78
+ 1. Their benchmarking technique is seriously flawed, as was clearly shown
79
+ by the Graphite and #9GXE's ""cheating"". Can't they just admit that
80
+ their benchmark is to easy to optimize for and/or cheat on? WinTach
81
+ is much harder to beat...
82
+
83
+ 2. The big ""cheat"" that Hercules/IIT used was overlappipping BitBlit and
84
+ PolyLine optimization, and Hercules sent them a new driver which didn't
85
+ do this -- but produced almost identical scores. Yet, the only thing
86
+ PC Magazine says is that scores are not ""legitimate"" -- couldn't they
87
+ at least make some comment about its real-world performance?
88
+
89
+ I'm not so much going by WinBench for performance, but, rather, by Steve
90
+ Gibson's results, which are more real-world based (like using Micrografx
91
+
92
+ ""Despite these documented cheats, I have to say that in real-world
93
+ applications the Hercules Graphite adapter actually draws faster than
94
+ any other display adapter in the industry. [...] Even though it's
95
+ just a modest ISA bus card, it outperformed every local bus adapter I
96
+ have, including the Stealth 24 VLB, the Fahrenheit 1280VA/VLB, the
97
+ ATI VLB Mach32 and even the Weitek P9000-based Diamond Viper VLB.""
98
+
99
+ This kind of praise is enough for me to be interested in it, bogus WinBench
100
+ or not! :)
101
+
102
+
103
+ I already returned the ATI GU+. My dealer had sold eight of them, and
104
+ seven were returned to him. I'm now (temporarily) back to running an
105
+ original ATI GU, until I get my Graphite. And, yes, the GU is faster in
106
+ my 16 MB system than the GU+, especially in bitmap handling (that's where
107
+ you use BitBlits).
108
+
109
+ Hercules has a promotion available for VARs and software developers: you
110
+ can buy one Graphite or Graphite VLB for 50% off list to try it out. At
111
+ just $199 for Graphite or $249 for the VLB version I decided it was worth
112
+ trying (I'm supposed to get the VLB board within two weeks). I'll post
113
+ my impressions when I get it...
114
+ "
115
+ 3,comp.sys.ibm.pc.hardware,"Hi. I am trying to set up a Conner 3184 and a Quantum 80AT drive. I have
116
+ the conner set to the master, and the quantum set to the slave (doesn't work
117
+ the other way around). I am able to access both drives if I boot from a
118
+ floppy, but the drives will not boot themselves. I am running MSDOS 6, and
119
+ have the Conner partitioned as Primary Dos, and is formatted with system
120
+ files. I have tried all different types of setups, and even changed IDE
121
+ controller cards. If I boot from a floppy, everything works great (except
122
+ the booting part :)). The system doesn't report an error message or anything,
123
+ just hangs there. Does anyone have any suggestions, or has somebody else
124
+ run into a similar problem? I was thinking that I might have to update the bios
125
+ on one of the drives (is this possible?). Any suggestions/answers would be
126
+ greatly appreciated. Please reply to:"
127
+ 4,comp.sys.mac.hardware," In certain Apple 13"" RGB monitors there has been a problem with
128
+ the HIGH VOLTAGE CAPASITOR. Apple knows about this problem and is
129
+ replacing the cap at no cost if it falls into the bad batch that
130
+ they got from their supplier. Your local repair shop should know about
131
+ REPAIR EXTENSION 3L0218.
132
+ "
133
+ 5,comp.windows.x,"
134
+ Have you gotten an answer yet? Using your variables, this is what I would do:
135
+ xterm -T ""$HOST - $LOGNAME"" -n ""$HOST""
136
+ "
137
+ 6,misc.forsale,"
138
+
139
+ COD is fine until the buyer opens the box to find they paid 150.00
140
+ for a brick. Or if it the seller allows for a personal check to be used
141
+ on a COD it's fine till a stop payment is made. There are few methods
142
+ to protect both buyer and seller in any sort of transaction. Even with
143
+ merchants and customers there are problems...stolen credit cards,
144
+ chargebacks, no return policies and getting the wrong item, etc.
145
+
146
+ About the only protection available to to do business with someone you
147
+ trust....someone who has been around for a while.
148
+
149
+ Jeff
150
+
151
+
152
+ ________________________________________________________________________"
153
+ 7,rec.autos,"From article <1993Apr5.200048.23421@ucsu.Colorado.EDU>, by lorenzo@rintintin.Colorado.EDU (Eric Lorenzo):
154
+
155
+ --Let me put it like this. The only similarity between the three models
156
+ is the ""300"", or 3-liter engine displacement. Actually, the SC300 (the
157
+ coupe) and the GS300 (the funky-looking new sedan) share the same 3.0
158
+ liter inline-six, and the ES300 (popular small sedan) uses 3.0 V6 shared
159
+ with the Camry. The SC300 is a luxury/sports coupe, the GS300 is the new
160
+ luxury sedan, and the ES300 is the base executive sedan. All three look
161
+ completely different.
162
+
163
+ --Aamir Qazi
164
+ -- "
165
+ 8,rec.motorcycles,"Now, I am jumping into the middle of this thread so I may not know
166
+ what y'all been talking about, but I have a few comments:
167
+
168
+
169
+ There are a number of other factors that are very important, the three
170
+ biggest being air velocity, air momentum and shock waves.
171
+ Velocity stacks have been used for years and are now being used inside
172
+ of stock airboxes on a number of bikes. At a tuned engine rpm, the
173
+ stacks can greatly increase the speed, and thus momentum of the air
174
+ rushing in.
175
+ Air momentum is critical in getting good air intake: the momentum of
176
+ the air stack outside the combustion chamber will force its way inside
177
+ long after the piston has begun its compressive up-stroke.
178
+ Shock waves are used to induce air intake and to prevent fresh air from
179
+ escaping out the exzhaust ports. Shock waves are the product of expansion
180
+ chambers or any other means of presenting a 'wall' (opening or closing)
181
+ to the air in motion. Beyond this I am lost in the mystery of how they
182
+ design for shock waves."
183
+ 9,rec.sport.baseball,"
184
+ Maybe it's just me, but the combination of those *young* faces peeking out
185
+ from under oversized aqua helmets screams ""Little League"" in every fibre of
186
+ my being...
187
+ "
188
+ 10,rec.sport.hockey,"........
189
+
190
+ It looks like the Edmonton Oilers just decided to take a European
191
+ vacation this spring...
192
+
193
+ Ranford, Tugnutt, Benning, Manson, Smith, Buchberger, and Corson
194
+ are playing for Canada.
195
+
196
+ Podein and Weight are playing for the US.
197
+
198
+ Is Kravchuk playing for the Russians...I know he had nagging
199
+ injuries late in the season.
200
+
201
+ Podein is an interesting case...because he was eligible to
202
+ play in Cape Breton in the AHL playoffs like Kovalev, Zubov,
203
+ and Andersson...obviously Sather and Pocklington are not
204
+ the total scrooges everyone makes them out to be...certainly
205
+ in this case they've massively outclassed Paramount and the
206
+ New York Rangers."
207
+ 11,sci.crypt,"Gee, I guess they should also have such a repository for house keys,
208
+ car keys, safety deposit keys, ... :-(
209
+
210
+ rdl"
211
+ 12,sci.electronics,"Doesn't Motorola AMCU have something on the BBS yet? (512-891-3733)
212
+ "
213
+ 13,sci.med,"[reply to keith@actrix.gen.nz (Keith Stewart)]
214
+
215
+
216
+ It would help if you (and anyone else asking for medical information on
217
+ some subject) could ask specific questions, as no one is likely to type
218
+ in a textbook chapter covering all aspects of the subject. If you are
219
+ looking for a comprehensive review, ask your local hospital librarian.
220
+ Most are happy to help with a request of this sort.
221
+
222
+ Briefly, this is a condition in which patients who have significant
223
+ residual weakness from childhood polio notice progression of the
224
+ weakness as they get older. One theory is that the remaining motor
225
+ neurons have to work harder and so die sooner."
226
+ 14,sci.space,"
227
+ Any lunar satellite needs fuel to do regular orbit corrections, and when
228
+ its fuel runs out it will crash within months. The orbits of the Apollo
229
+ motherships changed noticeably during lunar missions lasting only a few
230
+ days. It is *possible* that there are stable orbits here and there --
231
+ the Moon's gravitational field is poorly mapped -- but we know of none.
232
+
233
+ Perturbations from Sun and Earth are relatively minor issues at low
234
+ altitudes. The big problem is that the Moon's own gravitational field
235
+ is quite lumpy due to the irregular distribution of mass within the Moon."
236
+ 15,soc.religion.christian,"I wrote in response to dlecoint@garnet.acns.fsu.edu (Darius_Lecointe):
237
+
238
+
239
+ Was Paul a God too? Is an interpretation of the words of Paul of higher
240
+ priority than the direct word of Jesus in Matt5:14-19? Paul begins
241
+ Romans 14 with ""If someone is weak in the faith ..."" Do you count
242
+ yourself as one who is weak in the faith?
243
+
244
+
245
+ Yes, but what does the Bible have to say? What did Jesus say? Paul
246
+ closes Romans 14 with, ""On the other hand, the person with doubts about
247
+ something who eats it anyway is guilty, because he isn't acting on his
248
+ faith, and any failure to act on faith is a sin."" Gaus, ISBN:0-933999-99-2
249
+ Have you read the Ten Commandments which are a portion of the Law? Have
250
+ you read Jesus' word in Matt5:14-19? Is there any doubt in your mind
251
+ about what is right and what is sin (Greek hamartia = missing the mark)?
252
+
253
+
254
+ Whereas, the Ten Commandments and Jesus' words in Matt5:14-19 are fairly
255
+ clear, are they not?
256
+
257
+
258
+ Matt5:14-19 doesn't answer your question?
259
+
260
+
261
+ Breaking bread - roughly synonymous with eating.
262
+
263
+
264
+ How do you unite this concept of yours with the Ten Commandments and
265
+ Jesus's word in Matt5:14-19?
266
+
267
+
268
+ Or, they assumed that the Ten Commandments and Jesus' word in
269
+ Matt5:14-19 actually stood for something? Perhaps they were ""strong in
270
+ the faith?""
271
+
272
+ ---------------------------
273
+
274
+ [No, I don't believe that Paul can overrule God. However Paul was
275
+ writing for a largely Gentile audience. The Law was regarded by Jews
276
+ at the time (and now) as binding on Jews, but not on Gentiles. There
277
+ are rules that were binding on all human beings (the so-called Noachic
278
+ laws), but they are quite minimal. The issue that the Church had to
279
+ face after Jesus' death was what to do about Gentiles who wanted to
280
+ follow Christ. The decision not to impose the Law on them didn't say
281
+ that the Law was abolished. It simply acknowledged that fact that it
282
+ didn't apply to Gentiles. Thus there is no contradiction with Mat 5.
283
+ As far as I can tell, both Paul and other Jewish Christians did
284
+ continue to participate in Jewish worship on the Sabbath. Thus they
285
+ continued to obey the Law. The issue was (and is) with Gentile
286
+ Christians, who are not covered by the Law (or at least not by the
287
+ ceremonial aspects of it).
288
+
289
+ Jesus dealt mostly with Jews. I think we can reasonably assume that
290
+ Mat 5 was directed to a Jewish audience. He did interact with
291
+ Gentiles a few times (e.g. the centurion whose slave was healed and a
292
+ couple of others). The terms used to describe the centurion (see Luke
293
+ 7) suggest that he was a ""God-fearer"", i.e. a Gentile who followed
294
+ God, but had not adopted the whole Jewish Law. He was commended by
295
+ Jewish elders as a worthy person, and Jesus accepted him as such.
296
+ This seems to me to indicate that Jesus accepted the prevailing view
297
+ that Gentiles need not accept the Law.
298
+
299
+ However there's more involved if you want to compare Jesus and Paul on
300
+ the Law. In order to get a full picture of the role of the Law, we
301
+ have to come to grips with Paul's apparent rejection of the Law, and
302
+ how that relates to Jesus' commendation of the Law. At least as I
303
+ read Paul, he says that the Law serves a purpose that has been in a
304
+ certain sense superceded. Again, this issue isn't one of the
305
+ abolition of the Law. In the middle of his discussion, Paul notes
306
+ that he might be understood this way, and assures us that that's not
307
+ what he intends to say. Rather, he sees the Law as primarily being
308
+ present to convict people of their sinfulness. But ultimately it's an
309
+ impossible standard, and one that has been superceded by Christ.
310
+ Paul's comments are not the world's clearest here, and not everyone
311
+ agrees with my reading. But the interesting thing to notice is that
312
+ even this radical position does not entail an abolition of the Law.
313
+ It still remains as an uncompromising standard, from which not an iota
314
+ or dot may be removed. For its purpose of convicting of sin, it's
315
+ important that it not be relaxed. However for Christians, it's not
316
+ the end -- ultimately we live in faith, not Law.
317
+
318
+ While the theoretical categories they use are rather different, in the
319
+ end I think Jesus and Paul come to a rather similar conclusion. The
320
+ quoted passage from Mat 5 should be taken in the context of the rest
321
+ of the Sermon on the Mount, where Jesus shows us how he interprets the
322
+ Law. The ""not an iota or dot"" would suggest a rather literal reading,
323
+ but in fact that's not Jesus' approach. Jesus' interpretations
324
+ emphasize the intent of the Law, and stay away from the ceremonial
325
+ details. Indeed he is well known for taking a rather free attitude
326
+ towards the Sabbath and kosher laws. Some scholars claim that Mat
327
+ 5:17-20 needs to be taken in the context of 1st Cent. Jewish
328
+ discussions. Jesus accuses his opponents of caring about giving a
329
+ tenth of even the most minor herbs, but neglecting the things that
330
+ really matter: justice, mercy and faith, and caring about how cups and
331
+ plates are cleaned, but not about the fact that inside the people who
332
+ use them are full of extortion and rapacity. (Mat 23:23-25) This, and
333
+ the discussion later in Mat 5, suggest that Jesus has a very specific
334
+ view of the Law in mind, and that when he talks about maintaining the
335
+ Law in its full strength, he is thinking of these aspects of it.
336
+ Paul's conclusion is similar. While he talks about the Law being
337
+ superceded, all of the specific examples he gives involve the
338
+ ""ceremonial law"", such as circumcision and the Sabbath. He is quite
339
+ concerned about maintaining moral standards.
340
+
341
+ The net result of this is that when Paul talks about the Law being
342
+ superceded, and Jesus talks about the Law being maintained, I believe
343
+ they are talking about different aspects of the Law. Paul is
344
+ embroiled in arguments about circumcision. As is natural in letters
345
+ responding to specific situations, he's looking at the aspect of the
346
+ Law that is currently causing trouble: the Law as specifically Jewish
347
+ ceremonies. He certainly does not intend to abolish divine standards
348
+ of conduct. On the other hand, when Jesus commends the Law, he seems
349
+ to be talking the Law in its broadest implications for morals and
350
+ human relationships, and deemphasizing those aspects that were later
351
+ to give Paul so much trouble.
352
+
353
+ It's unfortunate that people use the same terms in different ways, but
354
+ we should be familiar with that from current conflicts. Look at the
355
+ way terms like ""family values"" take on special meaning from the
356
+ current context. Imagine some poor historian of the future trying to
357
+ figure out why ""family values"" should be used as a code word for
358
+ opposition to homosexuality in one specific period in the U.S. I
359
+ think Law had taken on a similar role in the arguments Paul was
360
+ involved in. Paul was clearly not rejecting all of the Jewish values
361
+ that go along with the term ""Law"", any more than people who concerned
362
+ about the ""family values"" movement are really opposed to family
363
+ values."
364
+ 16,talk.politics.guns,"
365
+ That's a revisionist account of what happened. Gritz was well-aware
366
+ of Duke's presence on the ticket. Given that Gritz is not at all shy
367
+ about associating and promoting other white supremacists (such as the
368
+ Christian Identity movement or Willis Carto), whatever reasons Gritz
369
+ had to leave the ticket had nothing to do with Duke's presence.
370
+
371
+
372
+ I believe Chip Berlet has a Populist Party newsletter from the time with
373
+ a photo of Gritz happily shaking hands with Duke."
374
+ 17,talk.politics.mideast,"
375
+ ^^^^^^^^^^^^^^^"
376
+ 18,talk.politics.misc,"
377
+ And the reason that the Soviet Union couldn't achieve the ideal of pure
378
+ communism was the hostility of surrounding capitalist nations...Uh huh.
379
+ Somehow, this all sounds familiar. Once again, utopian dreams are
380
+ confronted by the real world...
381
+ "
382
+ 19,talk.religion.misc,"
383
+
384
+ Paradise and salvation are not the same thing. Salvation is better. Refer
385
+ to John 14:2."
data/sample_text.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36c16fa77b26c7c6686dbdafccbe792b41b797d336af9e87f534133248a25ca7
3
+ size 12440444