Add BERTopic model

Browse files

Files changed (4) hide show

README.md +71 -0
config.json +14 -0
topic_embeddings.safetensors +3 -0
topics.json +231 -0

README.md ADDED Viewed

	@@ -0,0 +1,71 @@

+---
+tags:
+- bertopic
+library_name: bertopic
+pipeline_tag: text-classification
+---
+# string2-string
+This is a [BERTopic](https://github.com/MaartenGr/BERTopic) model.
+BERTopic is a flexible and modular topic modeling framework that allows for the generation of easily interpretable topics from large datasets.
+## Usage
+To use this model, please install BERTopic:
+```
+pip install -U bertopic
+```
+You can use the model as follows:
+```python
+from bertopic import BERTopic
+topic_model = BERTopic.load("syntag/string2-string")
+topic_model.get_topic_info()
+```
+## Topic overview
+* Number of topics: 4
+* Number of training documents: 20
+<details>
+  <summary>Click here for an overview of all topics.</summary>
+  | Topic ID | Topic Keywords | Topic Frequency | Label |
+|----------|----------------|-----------------|-------|
+| 0 | life - make - adulting - worm - gives | 7 | 0_life_make_adulting_worm |
+| 1 | like - bar - walk - matter - coding | 7 | 1_like_bar_walk_matter |
+| 2 | break - version - vacation - told - succeed | 3 | 2_break_version_vacation_told |
+| 3 | don - skeletons - shame - scientists - parallel | 3 | 3_don_skeletons_shame_scientists |
+</details>
+## Training hyperparameters
+* calculate_probabilities: False
+* language: None
+* low_memory: False
+* min_topic_size: 10
+* n_gram_range: (1, 1)
+* nr_topics: None
+* seed_topic_list: None
+* top_n_words: 10
+* verbose: False
+## Framework versions
+* Numpy: 1.24.4
+* HDBSCAN: 0.8.33
+* UMAP: 0.5.4
+* Pandas: 2.0.3
+* Scikit-Learn: 1.3.1
+* Sentence-transformers: 2.2.2
+* Transformers: 4.34.1
+* Numba: 0.58.1
+* Plotly: 5.17.0
+* Python: 3.10.12

config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "calculate_probabilities": false,
+  "language": null,
+  "low_memory": false,
+  "min_topic_size": 10,
+  "n_gram_range": [
+    1,
+    1
+  ],
+  "nr_topics": null,
+  "seed_topic_list": null,
+  "top_n_words": 10,
+  "verbose": false
+}

topic_embeddings.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0092a325c627101220eb2a6189f8efc5488d21465b923a63fc2a4a548fad8629
+size 6232

topics.json ADDED Viewed

	@@ -0,0 +1,231 @@

+{
+  "topic_representations": {
+    "0": [
+      [
+        "life",
+        0.6021077318297084
+      ],
+      [
+        "make",
+        0.4916189043891051
+      ],
+      [
+        "adulting",
+        0.48692197124668946
+      ],
+      [
+        "worm",
+        0.48692197124668946
+      ],
+      [
+        "gives",
+        0.48692197124668946
+      ],
+      [
+        "dough",
+        0.48692197124668946
+      ],
+      [
+        "fox",
+        0.48692197124668946
+      ],
+      [
+        "jar",
+        0.48692197124668946
+      ],
+      [
+        "kneaded",
+        0.48692197124668946
+      ],
+      [
+        "dog",
+        0.48692197124668946
+      ]
+    ],
+    "1": [
+      [
+        "like",
+        0.5110698918837933
+      ],
+      [
+        "bar",
+        0.43669811543340903
+      ],
+      [
+        "walk",
+        0.43669811543340903
+      ],
+      [
+        "matter",
+        0.43669811543340903
+      ],
+      [
+        "coding",
+        0.43669811543340903
+      ],
+      [
+        "bring",
+        0.43669811543340903
+      ],
+      [
+        "bugs",
+        0.43669811543340903
+      ],
+      [
+        "javascript",
+        0.43669811543340903
+      ],
+      [
+        "world",
+        0.43669811543340903
+      ],
+      [
+        "way",
+        0.43669811543340903
+      ]
+    ],
+    "2": [
+      [
+        "break",
+        0.7404576804695774
+      ],
+      [
+        "version",
+        0.7404576804695774
+      ],
+      [
+        "vacation",
+        0.7404576804695774
+      ],
+      [
+        "told",
+        0.7404576804695774
+      ],
+      [
+        "succeed",
+        0.7404576804695774
+      ],
+      [
+        "stop",
+        0.7404576804695774
+      ],
+      [
+        "sending",
+        0.7404576804695774
+      ],
+      [
+        "saving",
+        0.7404576804695774
+      ],
+      [
+        "needed",
+        0.7404576804695774
+      ],
+      [
+        "mode",
+        0.7404576804695774
+      ]
+    ],
+    "3": [
+      [
+        "don",
+        0.8067972479797363
+      ],
+      [
+        "skeletons",
+        0.7404576804695774
+      ],
+      [
+        "shame",
+        0.7404576804695774
+      ],
+      [
+        "scientists",
+        0.7404576804695774
+      ],
+      [
+        "parallel",
+        0.7404576804695774
+      ],
+      [
+        "meet",
+        0.7404576804695774
+      ],
+      [
+        "trust",
+        0.7404576804695774
+      ],
+      [
+        "ll",
+        0.7404576804695774
+      ],
+      [
+        "lines",
+        0.7404576804695774
+      ],
+      [
+        "guts",
+        0.7404576804695774
+      ]
+    ]
+  },
+  "topics": [
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    0,
+    1,
+    0,
+    1,
+    2,
+    0,
+    0,
+    2,
+    2,
+    3,
+    1,
+    0,
+    3,
+    3
+  ],
+  "topic_sizes": {
+    "0": 7,
+    "1": 7,
+    "2": 3,
+    "3": 3
+  },
+  "topic_mapper": [
+    [
+      0,
+      0,
+      1
+    ],
+    [
+      1,
+      1,
+      3
+    ],
+    [
+      2,
+      2,
+      0
+    ],
+    [
+      3,
+      3,
+      2
+    ]
+  ],
+  "topic_labels": {
+    "0": "0_life_make_adulting_worm",
+    "1": "1_like_bar_walk_matter",
+    "2": "2_break_version_vacation_told",
+    "3": "3_don_skeletons_shame_scientists"
+  },
+  "custom_labels": null,
+  "_outliers": 0,
+  "topic_aspects": {}
+}