{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "c3950dcb",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8b15018b05fd492c8b04c766b1a77705",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/30 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sentence_transformers import SentenceTransformer\n",
"from bertopic import BERTopic\n",
"from umap import UMAP\n",
"import pandas as pd\n",
"\n",
"df = pd.read_csv('NikeTwitter.csv')\n",
"docs = df['translated_text'].tolist()\n",
"\n",
"# Prepare embeddings\n",
"# docs = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))['data']\n",
"sentence_model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
"embeddings = sentence_model.encode(docs, show_progress_bar=True)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b83910f5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f011832e",
"metadata": {},
"outputs": [],
"source": [
"# Train BERTopic\n",
"topic_model = BERTopic().fit(docs, embeddings)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e93acf78",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"coloraxis": "coloraxis",
"hovertemplate": "x: %{x}
y: %{y}
Similarity Score: %{z}