{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "844c876f", "metadata": {}, "outputs": [], "source": [ "from bertopic import BERTopic\n", "import pandas as pd \n", "import openai\n", "from bertopic.representation import OpenAI\n", "from umap import UMAP\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "560143ff", "metadata": {}, "outputs": [], "source": [ "# df = pd.read_csv('mtu_merged_v3.csv')\n", "df = pd.read_csv('NikeTwitter.csv')\n", "\n", "docs = df['translated_text'].tolist()\n", "\n", "docs = [str(x).replace(\"#\",\" \") for x in docs]\n", "docs = [x for x in docs if len(x) > 30]" ] }, { "cell_type": "code", "execution_count": 3, "id": "a579594d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f40e914627074089906a1d35d06f55b4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Batches: 0%| | 0/30 [00:00