{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"machine_shape": "hm"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"gpuClass": "standard"
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Install required Libraries"
],
"metadata": {
"id": "TrV0i1Vk3_cE"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "eNA2FS2VPSwI",
"outputId": "f453cf29-5b42-4497-9199-2c39dfefcfca"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
],
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"source": [
"# Install tomotopy\n",
"! pip install tomotopy"
],
"metadata": {
"id": "alH-oKMa4EfV",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "1e67eab0-baab-4bcf-c571-bfb840e291a7"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting tomotopy\n",
" Downloading tomotopy-0.12.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (16.5 MB)\n",
"\u001b[K |████████████████████████████████| 16.5 MB 11.3 MB/s \n",
"\u001b[?25hRequirement already satisfied: numpy>=1.11.0 in /usr/local/lib/python3.7/dist-packages (from tomotopy) (1.21.6)\n",
"Installing collected packages: tomotopy\n",
"Successfully installed tomotopy-0.12.3\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# Imports and Setup"
],
"metadata": {
"id": "6GbZy4iAXEVe"
}
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import numpy as np\n",
"import os\n",
"import re\n",
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = \"all\" # allow multiple outputs in a cell\n",
"\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"%matplotlib inline\n",
"\n",
"\n",
"# import tomptopy\n",
"import tomotopy as tp\n",
"import pickle"
],
"metadata": {
"id": "5tPVn0h1R7cD"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Loading NLTK Modules\n",
"import nltk\n",
"# nltk.download('all')\n",
"nltk.download('stopwords')\n",
"nltk.download('punkt')\n",
"nltk.download('wordnet')\n",
"nltk.download('omw-1.4')\n",
"nltk.download('averaged_perceptron_tagger')\n",
"from nltk.stem import PorterStemmer\n",
"from nltk.corpus import stopwords"
],
"metadata": {
"id": "jVzSV7KaoN8C",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "1d9eec7e-e4a0-4239-ba8c-fe2e4fa9ec38"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
"[nltk_data] Package stopwords is already up-to-date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 78
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package punkt to /root/nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 78
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
"[nltk_data] Package wordnet is already up-to-date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 78
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n",
"[nltk_data] Package omw-1.4 is already up-to-date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 78
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package averaged_perceptron_tagger to\n",
"[nltk_data] /root/nltk_data...\n",
"[nltk_data] Package averaged_perceptron_tagger is already up-to-\n",
"[nltk_data] date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 78
}
]
},
{
"cell_type": "code",
"source": [
"class topicModel:\n",
"\n",
" # perform pre-processing steps using lemmatization, stop-words and unnecessary punctuation removal\n",
" def preprocess_article_text(self, doc_article):\n",
" \"\"\"\n",
" Accept pandas series, then:\n",
" 1. Apply Word stemming\n",
" 2. Apply Stop Word removal\n",
" \"\"\"\n",
" # clean\n",
" doc_article = doc_article.lower()\n",
" # remove stop words\n",
" words = nltk.word_tokenize(doc_article)\n",
" stop_words = stopwords.words('english')\n",
" stop_words = stop_words + [\"said\", \"says\", \"just\", \"like\", \"would\", \"could\", \"use\", \"told\", \"new\", \"also\", \"thats\", \"even\",\"dont\"]\n",
" words = [word for word in words if word not in stop_words and len(word) > 3]\n",
" doc_article = ' '.join(words)\n",
" doc_article = doc_article.replace('\\xa0', '')\n",
" doc_article = re.sub('[!\"#$%&\\'()’*+,-./:;<=>?—@[\\\\]^_`{|}~’]', '', doc_article)\n",
" # remove digits \n",
" doc_article = re.sub(\"^\\d+\\s|\\s\\d+\\s|\\s\\d+$\", \" \", doc_article)\n",
" return doc_article\n",
"\n",
" def LdaModel_train(self, doc_list):\n",
" # k_g is th number of global topics, while k_l is the number of local topics\n",
" num_doc = len(doc_list)\n",
" mdl = tp.LDAModel(k=5, min_cf= int(num_doc * 0.25), min_df= int(num_doc * 0.33))\n",
" for document in doc_list:\n",
" mdl.add_doc(document.split())\n",
"\n",
" iterations = 100\n",
" for i in range(0, 10000, iterations):\n",
" mdl.train(iterations)\n",
" print('Iteration: {}\\tLog-likelihood: {}'.format(i, mdl.ll_per_word))\n",
" result_dict_train = self.extract_topic(mdl)\n",
" return result_dict_train, mdl\n",
"\n",
" def extract_topic(self, mdl):\n",
" result_dict = {}\n",
" topic_dict = {}\n",
" extractor = tp.label.PMIExtractor(max_len=5, max_cand=10000)\n",
" cands = extractor.extract(mdl)\n",
"\n",
" # ranking the candidates of labels for a specific topic\n",
" labeler = tp.label.FoRelevance(mdl, cands, smoothing=1e-2, mu=0.25)\n",
"\n",
" # for k in range(mdl.k):\n",
" # print(\"== Topic #{} ==\".format(k))\n",
" # print(\"Labels:\", ', '.join(label for label, score in labeler.get_topic_labels(k, top_n=5)))\n",
" # for word, prob in mdl.get_topic_words(k, top_n=10):\n",
" # print(word, prob, sep='\\t')\n",
"\n",
" max_topic_num = 0\n",
" for k in range(mdl.k):\n",
" cur_topic = \"topic#\"+str(k)\n",
" result_dict[cur_topic] = {}\n",
" result_dict[cur_topic][\"labels\"] = (', '.join(label for label, score in labeler.get_topic_labels(k, top_n=5)))\n",
" # result_dict[cur_topic]['topics'] = mdl.get_topic_words(k, top_n=10)\n",
" result_dict[cur_topic]['topics'] = ' ,'.join([i[0] for i in mdl.get_topic_words(k, top_n=5)])\n",
" \n",
" return result_dict\n",
"\n",
" def LdaModel_predict(self, doc_list, mdl):\n",
" pred_result = {}\n",
" docs_words = []\n",
" for doc in doc_list:\n",
" docs_words = docs_words + doc.strip().split()\n",
" doc_inst = mdl.make_doc(docs_words)\n",
" topic_dist, ll = mdl.infer(doc_inst)\n",
" # sort the topic dist and take index\n",
" topic_dist_arr = np.array(topic_dist)\n",
" topic_dist_idx = topic_dist_arr.argsort()[::-1]\n",
" mdl_topic = self.extract_topic(mdl)\n",
" idx = 0\n",
" for i in topic_dist_idx:\n",
" if topic_dist[i]>0:\n",
" pred_result[\"topic#\"+str(idx)] = mdl_topic[\"topic#\"+str(i)]\n",
" idx+=1\n",
" return pred_result\n",
"\n"
],
"metadata": {
"id": "vnab3ToAR7o2"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data_path = '/content/drive/MyDrive/GLG_project/data/GLG_train_data_labeled.csv'\n",
"df_train = pd.read_csv(data_path, sep=',')"
],
"metadata": {
"id": "0yfvTUhdMMZv"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"hierarchical_data_path = '/content/drive/MyDrive/GLG_project/data/hierarchial_cluster.csv'\n",
"df_hierarchical = pd.read_csv(hierarchical_data_path, sep=',')"
],
"metadata": {
"id": "qMdJT7BDMvLS"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Creat a class object to access all methods and instance of the class\n",
"topic_object = topicModel()"
],
"metadata": {
"id": "pLg7nXlrOJQW"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Preprocess training data\n",
"df_train['preprocessed_article'] = df_train['article'].apply(topic_object.preprocess_article_text)"
],
"metadata": {
"id": "qUXB3_o2qTc-"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df_train.head(2)"
],
"metadata": {
"id": "gQ8EO4iWsKKM",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 315
},
"outputId": "494cf943-31c3-4874-f2d8-7287589cb9e0"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" date year month day \\\n",
"0 2018-08-09 09:11:14 2018 8.0 9 \n",
"1 2016-04-26 00:00:00 2016 4.0 26 \n",
"\n",
" title \\\n",
"0 Psychologists’ Group Maintains Ban on Work at ... \n",
"1 Prince autopsy: What examiners looked for \n",
"\n",
" article \\\n",
"0 MIND Members of the American Psychological Ass... \n",
"1 (CNN)Pop superstar Prince died from an accide... \n",
"\n",
" url section \\\n",
"0 https://www.nytimes.com/2018/08/09/health/inte... health \n",
"1 https://www.cnn.com/2016/04/26/health/prince-d... health \n",
"\n",
" publication tech_health_tag article_word_len cluster_label \\\n",
"0 The New York Times health 700 22 \n",
"1 CNN health 889 9 \n",
"\n",
" preprocessed_article \n",
"0 mind members american psychological associatio... \n",
"1 superstar prince died accidental overdose opio... "
],
"text/html": [
"\n",
"
\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" year | \n",
" month | \n",
" day | \n",
" title | \n",
" article | \n",
" url | \n",
" section | \n",
" publication | \n",
" tech_health_tag | \n",
" article_word_len | \n",
" cluster_label | \n",
" preprocessed_article | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2018-08-09 09:11:14 | \n",
" 2018 | \n",
" 8.0 | \n",
" 9 | \n",
" Psychologists’ Group Maintains Ban on Work at ... | \n",
" MIND Members of the American Psychological Ass... | \n",
" https://www.nytimes.com/2018/08/09/health/inte... | \n",
" health | \n",
" The New York Times | \n",
" health | \n",
" 700 | \n",
" 22 | \n",
" mind members american psychological associatio... | \n",
"
\n",
" \n",
" 1 | \n",
" 2016-04-26 00:00:00 | \n",
" 2016 | \n",
" 4.0 | \n",
" 26 | \n",
" Prince autopsy: What examiners looked for | \n",
" (CNN)Pop superstar Prince died from an accide... | \n",
" https://www.cnn.com/2016/04/26/health/prince-d... | \n",
" health | \n",
" CNN | \n",
" health | \n",
" 889 | \n",
" 9 | \n",
" superstar prince died accidental overdose opio... | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 84
}
]
},
{
"cell_type": "code",
"source": [
"df_hierarchical[df_hierarchical['parent']==9909]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 144
},
"id": "CsWshaLTiRxd",
"outputId": "b4528d0a-96d9-4598-f685-48498cb4dbcd"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" parent child lambda_val child_size cluster_label\n",
"954 9909 1088 3.316230 1 -1\n",
"959 9909 9913 3.333467 575 P\n",
"960 9909 9914 3.333467 381 P"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" parent | \n",
" child | \n",
" lambda_val | \n",
" child_size | \n",
" cluster_label | \n",
"
\n",
" \n",
" \n",
" \n",
" 954 | \n",
" 9909 | \n",
" 1088 | \n",
" 3.316230 | \n",
" 1 | \n",
" -1 | \n",
"
\n",
" \n",
" 959 | \n",
" 9909 | \n",
" 9913 | \n",
" 3.333467 | \n",
" 575 | \n",
" P | \n",
"
\n",
" \n",
" 960 | \n",
" 9909 | \n",
" 9914 | \n",
" 3.333467 | \n",
" 381 | \n",
" P | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 97
}
]
},
{
"cell_type": "code",
"source": [
"# Run Multi Grain LDA Model for training data\n",
"model_result_train = {\"global\": {}, \"local\":{}}\n",
"\n",
"# Global clusters\n",
"cluster_labels = [str(i) for i in df_train['cluster_label'].unique()]\n",
"for cluster_label in cluster_labels:\n",
" df_hierarchical_ = df_hierarchical[df_hierarchical['cluster_label']==cluster_label]\n",
" print('Starting training model {}'.format(cluster_label))\n",
" parent_docs = df_hierarchical_['parent'].unique()\n",
" print(parent_docs)\n",
" if len(parent_docs) > 1:\n",
" parent_docs = sorted(parent_docs)\n",
" global_p = parent_docs[:1]\n",
" global_docs_indx = df_hierarchical_[df_hierarchical_['parent'].isin(global_p)]['child']\n",
" global_docs = df_train.iloc[global_docs_indx]['preprocessed_article'].tolist()\n",
" local_p = parent_docs[1:]\n",
" else:\n",
" global_p = df_hierarchical[df_hierarchical['child']== parent_docs[0]]['parent'].tolist()\n",
" global_docs_indx = df_hierarchical[(df_hierarchical['parent'].isin(global_p)) & (df_hierarchical['cluster_label']!=\"P\")]['child']\n",
" global_docs = df_train.iloc[global_docs_indx]['preprocessed_article'].tolist()\n",
" local_p = parent_docs\n",
"\n",
" local_docs_indx = df_hierarchical_[df_hierarchical_['parent'].isin(local_p)]['child']\n",
" local_docs = df_train.iloc[local_docs_indx]['preprocessed_article'].tolist()\n",
"\n",
" model_result_train['global'][cluster_label], mdl_g = topic_object.LdaModel_train(global_docs)\n",
" # save the model file\n",
" mdl_g.save('/content/drive/MyDrive/GLG_project/GLG_topic_model/mdl_topic_model_global_' + str(cluster_label) + '.bin')\n",
"\n",
" model_result_train['local'][cluster_label], mdl_l = topic_object.LdaModel_train(local_docs)\n",
" # save the model file\n",
" mdl_l.save('/content/drive/MyDrive/GLG_project/GLG_topic_model/mdl_topic_model_local_' + str(cluster_label) + '.bin')\n"
],
"metadata": {
"id": "ZyP2D_WzsKaN"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import json\n",
"print(json.dumps(model_result_train['global'], sort_keys=True, indent=4))\n"
],
"metadata": {
"id": "69JC680doyY_",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2cb4d7de-1fc4-4249-9890-a85f010581c7"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"-1\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"contact, symptoms, organization, boars, wild boars\",\n",
" \"topics\": \"virus ,spread ,health ,world ,reporting\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"boars, wild boars, african swine fever, african swine, swine fever\",\n",
" \"topics\": \"outbreak ,ministry ,disease ,agriculture ,reuters\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"total compensation million versus million, total compensation, versus, million versus, versus million\",\n",
" \"topics\": \"million ,reuters ,reporting ,beijing ,last\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"cull, usda, h5n8, flocks, strains\",\n",
" \"topics\": \"poultry ,bird ,birds ,highly ,china\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"monitoring, china reported, agriculture rural, beijing reuters china, ministry agriculture rural\",\n",
" \"topics\": \"china ,reported ,beijing ,editing ,reporting\"\n",
" }\n",
" },\n",
" \"0\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"cull, poultry markets, live poultry, flocks, usda\",\n",
" \"topics\": \"poultry ,bird ,birds ,reuters ,outbreak\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"contact, symptoms, organization, wild boars, boars\",\n",
" \"topics\": \"virus ,spread ,health ,world ,reporting\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"china reported, paris, sybille, hamaide, sybille hamaide\",\n",
" \"topics\": \"outbreak ,reported ,farm ,highly ,killed\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"filing, total compensation million, total compensation, million versus, versus million\",\n",
" \"topics\": \"million ,world ,reuters ,last ,reporting\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"herd, african swine, swine fever, african swine fever, agriculture rural\",\n",
" \"topics\": \"china ,ministry ,disease ,agriculture ,reuters\"\n",
" }\n",
" },\n",
" \"1\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"bain, nuclear, consortium, toshiba corp, chip unit\",\n",
" \"topics\": \"reuters ,reporting ,editing ,march ,last\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"cure, south sudan, immune system, guinea, humans\",\n",
" \"topics\": \"virus ,world ,first ,people ,reuters\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"south sudan, without borders, doctors without borders, doctors without, probable\",\n",
" \"topics\": \"outbreak ,health ,people ,world ,virus\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"health insurance, kaiser, insurance coverage, cancers, diagnoses\",\n",
" \"topics\": \"health ,first ,statement ,people ,reuters\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"stigma, insurance coverage, diagnoses, kaiser, preexposure\",\n",
" \"topics\": \"people ,health ,year ,virus ,last\"\n",
" }\n",
" },\n",
" \"10\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"autoinjector, epipens, epinephrine, patients, drug\",\n",
" \"topics\": \"generic ,drug ,patients ,epinephrine ,products\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"epipens, epinephrine, autoinjector, emergency, pfizer\",\n",
" \"topics\": \"epipens ,shortage ,supply ,lifesaving ,used\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"emergency, pfizer, product, epipens, generic\",\n",
" \"topics\": \"pfizer ,emergency ,united ,states ,medical\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"us, autoinjector, health, patients, price\",\n",
" \"topics\": \"mylan ,epipen ,company ,price ,product\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"price, health, us, treatment, allergy\",\n",
" \"topics\": \"us ,grassley ,federal ,department ,reuters\"\n",
" }\n",
" },\n",
" \"11\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"autoinjector, epinephrine, patients, us, epipens\",\n",
" \"topics\": \"epipen ,emergency ,lifesaving ,last ,device\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"pfizer, epipens, emergency, epinephrine, autoinjector\",\n",
" \"topics\": \"epipens ,shortage ,supply ,pfizer ,available\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"autoinjector, epipens, epinephrine, patients, drug\",\n",
" \"topics\": \"generic ,drug ,patients ,epinephrine ,products\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"health, price, us, allergy, generic\",\n",
" \"topics\": \"mylan ,price ,us ,health ,grassley\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"pfizer, emergency, product, price, allergy\",\n",
" \"topics\": \"company ,product ,allergy ,states ,united\"\n",
" }\n",
" },\n",
" \"12\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"risk, medicine, doctor, atrial fibrillation, fibrillation\",\n",
" \"topics\": \"health ,people ,data ,device ,used\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"heart rate, atrial fibrillation, fibrillation, atrial, heart\",\n",
" \"topics\": \"heart ,rate ,devices ,fitness ,used\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"watch series, apple watch series, samsung, series, apple watch\",\n",
" \"topics\": \"watch ,apple ,life ,used ,fitbit\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"started, popular, industry, silicon, developing\",\n",
" \"topics\": \"company ,year ,fitness ,first ,time\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"versa, healthcare, fitbit, sales, started\",\n",
" \"topics\": \"fitbit ,users ,fitness ,used ,according\"\n",
" }\n",
" },\n",
" \"13\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"watch series, apple watch series, image, series, apple watch\",\n",
" \"topics\": \"watch ,apple ,time ,used ,fitbit\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"measures, person, risk, medicine, doctor\",\n",
" \"topics\": \"health ,people ,data ,devices ,device\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"started, silicon, industry, offering, developing\",\n",
" \"topics\": \"company ,year ,first ,fitness ,last\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"heart rate, atrial fibrillation, fibrillation, heart, atrial\",\n",
" \"topics\": \"heart ,rate ,first ,fitness ,used\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"versa, healthcare, fitbit, started, goals\",\n",
" \"topics\": \"fitbit ,company ,fitness ,users ,used\"\n",
" }\n",
" },\n",
" \"14\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"takeaway, food delivery, food, delivery, competition\",\n",
" \"topics\": \"food ,delivery ,right ,meal ,whether\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"takeaway, shares, competition, share, first\",\n",
" \"topics\": \"takeaway ,shares ,competition ,amazon ,sales\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"right, last, year, percent, customers\",\n",
" \"topics\": \"apron ,blue ,service ,time ,according\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"share, company, shares, billion, last\",\n",
" \"topics\": \"company ,market ,percent ,share ,last\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"customers, million, billion, share, first\",\n",
" \"topics\": \"million ,year ,billion ,customers ,business\"\n",
" }\n",
" },\n",
" \"15\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"deal ,group ,percent ,products ,payment\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"medianet ,company ,technology ,united ,microsoft\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"turakhia ,based ,states ,digital ,msfto\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"chinese ,advertising ,miteno ,users ,amount\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"tech ,million ,reporting ,revenue ,interview\"\n",
" }\n",
" },\n",
" \"16\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"company ,facebook ,compete ,watch ,important\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"content ,people ,want ,things ,spend\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"mayer ,world ,team ,chief ,delivered\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"yahoo ,mobile ,million ,khalaf ,revenue\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"free ,access ,slim ,firm ,offer\"\n",
" }\n",
" },\n",
" \"17\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"ibes, cents share, third quarter, analysts average, cents\",\n",
" \"topics\": \"year ,last ,company ,billion ,people\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"selfdriving, technology companies, third quarter, taxes, tech industry\",\n",
" \"topics\": \"company ,companies ,technology ,last ,make\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"obsessed, tech product, carry, editor, print\",\n",
" \"topics\": \"time ,people ,make ,first ,technology\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"free android, interface, headphones, carry, obsessed\",\n",
" \"topics\": \"people ,make ,companies ,company ,year\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"ibes, analysts average, cents share, adjusted, cents\",\n",
" \"topics\": \"billion ,company ,year ,first ,last\"\n",
" }\n",
" },\n",
" \"18\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"match, lets, profiles, studies, influencers\",\n",
" \"topics\": \"people ,time ,make ,social ,media\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"fees, influencers, digital media, pelosi, media companies\",\n",
" \"topics\": \"content ,media ,companies ,online ,last\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"lets, donate, european, influencers, birthday\",\n",
" \"topics\": \"facebook ,social ,people ,companies ,company\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"profiles, prior, earnings report, daily active, daily active users\",\n",
" \"topics\": \"users ,company ,year ,last ,platform\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"twitter accounts, tweeting, phone number, twitter account, jack dorsey\",\n",
" \"topics\": \"twitter ,social ,including ,facebook ,many\"\n",
" }\n",
" },\n",
" \"19\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"credit, consumers, companies, banks, financial\",\n",
" \"topics\": \"banks ,financial ,consumers ,statement ,credit\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"market, tuesday, twitter, last, company\",\n",
" \"topics\": \"company ,last ,reuters ,reporting ,twitter\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"market, banking, fintech, britain, access\",\n",
" \"topics\": \"fintech ,banking ,services ,britain ,since\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"information, access, data, companies, consumers\",\n",
" \"topics\": \"data ,information ,companies ,access ,according\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"added, bank, customer, digital, online\",\n",
" \"topics\": \"customers ,bank ,customer ,online ,issue\"\n",
" }\n",
" },\n",
" \"2\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"promote, providing, education, obamacare, programs\",\n",
" \"topics\": \"health ,reproductive ,care ,services ,trump\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"medication, introduced, tests, taken, home\",\n",
" \"topics\": \"women ,abortion ,abortions ,states ,clinics\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"abortion restrictions, remains, texas, attorney general, legislature\",\n",
" \"topics\": \"abortion ,state ,rights ,court ,supreme\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"medication, fetus, introduced, tests, sign\",\n",
" \"topics\": \"bill ,pregnancy ,abortions ,parenthood ,since\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"providing, editing, reporting, reuters, abortion restrictions\",\n",
" \"topics\": \"federal ,planned ,us ,parenthood ,legal\"\n",
" }\n",
" },\n",
" \"20\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"footage, link, connected, imsi, first time\",\n",
" \"topics\": \"security ,company ,year ,time ,used\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"tech companies, personal data, industries, campaign, processors\",\n",
" \"topics\": \"data ,companies ,company ,information ,year\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"recognition technology, facial recognition technology, rekognition, recognition software, facial recognition software\",\n",
" \"topics\": \"technology ,people ,used ,make ,companies\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"robocall, robocalls, caller, spam, phone number\",\n",
" \"topics\": \"phone ,companies ,using ,used ,year\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"dozens, events, ring, footage, staff\",\n",
" \"topics\": \"information ,people ,year ,without ,first\"\n",
" }\n",
" },\n",
" \"21\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"childhood, severely obese, risks, develop, height\",\n",
" \"topics\": \"people ,percent ,health ,years ,data\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"greater, protein, kind, meal, calorie\",\n",
" \"topics\": \"food ,less ,found ,diet ,eating\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"morning, pick, kind, specific, workouts\",\n",
" \"topics\": \"body ,people ,exercise ,much ,time\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"involved study, male, exposure, early, reuters health\",\n",
" \"topics\": \"study ,researchers ,risk ,health ,research\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"weight gain, mass index, body mass index, body mass, index\",\n",
" \"topics\": \"weight ,obesity ,obese ,women ,study\"\n",
" }\n",
" },\n",
" \"22\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"percent higher, jones, previous studies, study researchers examined, researchers examined data\",\n",
" \"topics\": \"study ,researchers ,likely ,research ,found\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"medical students, trainees, medical school, medicine mount, medicine mount sinai\",\n",
" \"topics\": \"school ,medical ,medicine ,lead ,university\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"humor, fever, sexual behavior, ride, sensation\",\n",
" \"topics\": \"people ,time ,many ,years ,first\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"elevated, percent higher, soil, smoking, increase risk\",\n",
" \"topics\": \"health ,risk ,medical ,found ,including\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"human cells, injected, pigs, geneediting, ethical\",\n",
" \"topics\": \"research ,work ,university ,used ,medicine\"\n",
" }\n",
" },\n",
" \"3\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"cusack ,stop ,delivery ,help ,simply\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"people ,team ,regular ,lansing ,michigan\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"customers ,lake ,snowmobile ,want ,delivering\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"prescriptions ,posted ,pick ,enlisted ,grand\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"odessa ,snow ,pharmacy ,danger ,able\"\n",
" }\n",
" },\n",
" \"4\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"\"\n",
" }\n",
" },\n",
" \"5\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"side effects, statins, participants, effect, women\",\n",
" \"topics\": \"drug ,drugs ,used ,many ,medicine\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"participants, attack stroke, heart attack, attack, heart attack stroke\",\n",
" \"topics\": \"study ,patients ,risk ,heart ,taking\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"parents, fever, common, acetaminophen, child\",\n",
" \"topics\": \"according ,doctors ,symptoms ,research ,doctor\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"hospitals, india, private, system, billion\",\n",
" \"topics\": \"health ,treatment ,year ,percent ,increase\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"heart attack stroke, attack stroke, heart attack, attack, effect\",\n",
" \"topics\": \"disease ,people ,years ,time ,found\"\n",
" }\n",
" },\n",
" \"6\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"musk ,tunnels ,challenges ,construction ,many\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"boring ,potential ,depth ,likely ,digging\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"tunnels ,deep ,company ,tunnel ,university\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"urban ,civil ,professor ,city ,state\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"feet ,engineering ,evans ,weakened ,layers\"\n",
" }\n",
" },\n",
" \"7\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"engineering ,civil ,need ,geotechnical ,state\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"company ,tunnel ,challenges ,mason ,future\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"tunnels ,construction ,university ,professor ,impossible\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"musk ,feet ,time ,city ,building\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"\",\n",
" \"topics\": \"deep ,boring ,angeles ,engineers ,evans\"\n",
" }\n",
" },\n",
" \"8\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"internet, united, growth, companies, order\",\n",
" \"topics\": \"companies ,people ,market ,another ,technology\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"food delivery, asia, region, southeast asia, ridehailing\",\n",
" \"topics\": \"million ,services ,billion ,across ,another\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"editing, reuters, launch, firms, court\",\n",
" \"topics\": \"reuters ,technology ,reporting ,editing ,across\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"name, statement, food delivery, indonesia, largest\",\n",
" \"topics\": \"company ,billion ,business ,technology ,another\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"take, products, chinese, mobile, information\",\n",
" \"topics\": \"year ,last ,payments ,time ,first\"\n",
" }\n",
" },\n",
" \"9\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"latestage, bengaluru editing, bengaluru, shares, lilly\",\n",
" \"topics\": \"drug ,patients ,percent ,company ,disease\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"problem, since, cancer, impact, pressure\",\n",
" \"topics\": \"health ,year ,according ,time ,million\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"risk developing, author, participants, neurology, risk alzheimer\",\n",
" \"topics\": \"study ,brain ,research ,researchers ,found\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"beta amyloid, alzheimer drug, beta, immune, lilly\",\n",
" \"topics\": \"alzheimer ,disease ,drugs ,brain ,research\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"diet, exercise, training, neurology, risk factors\",\n",
" \"topics\": \"dementia ,people ,risk ,cognitive ,university\"\n",
" }\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import json\n",
"print(json.dumps(model_result_train['local'], sort_keys=True, indent=4))\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1smkqLUV7MZP",
"outputId": "1c37fea3-d54c-467b-ae15-70b2936e8a98"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"-1\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"science times, attendance, mental illness, taught, phrase\",\n",
" \"topics\": \"work ,time ,first ,make ,years\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"dominant, nasa, operating system, tech industry, search engine\",\n",
" \"topics\": \"year ,last ,according ,time ,still\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"short videos, douyin, merger, gopro, passwords\",\n",
" \"topics\": \"company ,last ,year ,according ,people\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"percent less likely, researchers examined data, online january, mercury, micrograms\",\n",
" \"topics\": \"health ,found ,according ,people ,company\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"study period, otolaryngology, geriatrician, geriatric, people take\",\n",
" \"topics\": \"people ,found ,many ,years ,make\"\n",
" }\n",
" },\n",
" \"0\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"rubella, scans, born microcephaly, congenital, disorder\",\n",
" \"topics\": \"zika ,virus ,microcephaly ,brazil ,outbreak\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"females, method, eggs, spray, insects\",\n",
" \"topics\": \"mosquitoes ,mosquito ,people ,control ,spread\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"control district, mosquito control district, miami beach, beach, miamidade county\",\n",
" \"topics\": \"zika ,virus ,florida ,states ,officials\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"brazil confirmed, suspected cases, spread rapidly, spread rapidly americas, considers\",\n",
" \"topics\": \"health ,cases ,us ,reuters ,case\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"cdc, countries territories, territories, fetus, hearing\",\n",
" \"topics\": \"women ,pregnant ,infected ,birth ,infection\"\n",
" }\n",
" },\n",
" \"1\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"bengaluru editing, euros reporting, reuters south, jane, jason neely\",\n",
" \"topics\": \"reuters ,reporting ,editing ,company ,coronavirus\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"eikon company coverage gdansk newsroom, text eikon company coverage gdansk, eikon company coverage gdansk, coverage gdansk newsroom, company coverage gdansk\",\n",
" \"topics\": \"company ,march ,reuters ,source ,text\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"year financial results, full year financial, quarter full year financial results, fourth quarter full year financial, quarter full year financial\",\n",
" \"topics\": \"company ,reuters ,source ,coverage ,text\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"alex richardson, richardson, editing alex, editing alex richardson, state news\",\n",
" \"topics\": \"coronavirus ,reuters ,reporting ,editing ,march\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"business update, year financial results, full year financial, quarter full year financial, full year financial results\",\n",
" \"topics\": \"company ,reuters ,source ,coverage ,text\"\n",
" }\n",
" },\n",
" \"10\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"individual plans, special enrollment, periods, jan, csrs\",\n",
" \"topics\": \"obamacare ,plans ,insurers ,percent ,individual\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"medicare advantage, private health, network, sanders, centers medicare medicaid services\",\n",
" \"topics\": \"medicare ,healthcare ,health ,care ,government\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"graham, house bill, majority leader, senate bill, cassidy\",\n",
" \"topics\": \"bill ,senate ,house ,repeal ,republicans\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"urban, household income, household, type, adult\",\n",
" \"topics\": \"health ,insurance ,people ,coverage ,plan\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"conference, issued, immediate, meeting, press\",\n",
" \"topics\": \"trump ,obamacare ,president ,administration ,congress\"\n",
" }\n",
" },\n",
" \"11\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"spinraza, muscular, avexis, physician, zolgensma\",\n",
" \"topics\": \"patients ,treatment ,drug ,cancer ,drugs\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"glyphosate, personalized, firstline treatment, cancer institute, cancer center\",\n",
" \"topics\": \"cancer ,patients ,treatment ,year ,drug\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"warning letter, shareholders, biogen, californiabased, us district\",\n",
" \"topics\": \"company ,us ,reuters ,drug ,reporting\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"democrats, lobbying, lowering, negotiate drug, campaign\",\n",
" \"topics\": \"drug ,drugs ,year ,administration ,patients\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"democrats, harvoni, lobbying, lowering, high drug\",\n",
" \"topics\": \"drugs ,drug ,patients ,reporting ,cancer\"\n",
" }\n",
" },\n",
" \"12\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"incredibly, class, simulation, letter, editor\",\n",
" \"topics\": \"game ,world ,time ,games ,company\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"simulation, editor, gameplay, weapons, publishers\",\n",
" \"topics\": \"games ,game ,video ,last ,company\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"incredibly, editor, night, living, literally\",\n",
" \"topics\": \"people ,first ,time ,still ,around\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"cloud gaming, game streaming, rivals, gaming market, million units\",\n",
" \"topics\": \"gaming ,games ,year ,last ,game\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"magic leap, investment, standalone, ship, snap\",\n",
" \"topics\": \"company ,year ,game ,games ,people\"\n",
" }\n",
" },\n",
" \"13\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"amazoncom, echo devices, edition, prime, fire stick\",\n",
" \"topics\": \"amazon ,devices ,google ,alexa ,home\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"google translate, translated, sentences, pixel, restaurant\",\n",
" \"topics\": \"google ,available ,make ,amazon ,alexa\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"musicians, vinyl, translated, kickstarter, noisecanceling\",\n",
" \"topics\": \"company ,first ,make ,time ,devices\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"alexa voice, amazon announced, echo devices, assistant alexa, wake word\",\n",
" \"topics\": \"alexa ,amazon ,voice ,home ,google\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"movies, home security, google home, door, lock\",\n",
" \"topics\": \"home ,google ,devices ,amazon ,alexa\"\n",
" }\n",
" },\n",
" \"14\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"theory, ethereum, hash, quantum computer, calculations\",\n",
" \"topics\": \"computer ,used ,time ,research ,according\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"colleges, employee, entrepreneurs, hired, cloud computing\",\n",
" \"topics\": \"company ,companies ,technology ,year ,work\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"privacy, cloud computing, bias, tencent, employee\",\n",
" \"topics\": \"intelligence ,artificial ,data ,human ,technology\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"mckinsey, warehouse, replaced, prototype, robots\",\n",
" \"topics\": \"robots ,human ,need ,years ,still\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"effect, computations, quantum computer, list, employee\",\n",
" \"topics\": \"people ,make ,world ,years ,time\"\n",
" }\n",
" },\n",
" \"15\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"visit, sanders, senator, political, york business\",\n",
" \"topics\": \"amazon ,time ,according ,companies ,including\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"coronavirus, deliver packages, brazilian, grocery delivery, delivery service\",\n",
" \"topics\": \"delivery ,customers ,companies ,including ,first\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"sanders, sale, percent year, senator, closing\",\n",
" \"topics\": \"percent ,year ,market ,time ,first\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"political, york business, finance, advertising business, deliver packages\",\n",
" \"topics\": \"company ,business ,companies ,people ,years\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"sale, ebay, buyers, venture, visit\",\n",
" \"topics\": \"billion ,year ,million ,online ,ecommerce\"\n",
" }\n",
" },\n",
" \"16\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"nadella, jumped percent, percent year, patents, dropped\",\n",
" \"topics\": \"percent ,market ,shares ,according ,last\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"million pounds, aviv reuters, tova, tova cohen, cohen\",\n",
" \"topics\": \"million ,reuters ,reporting ,editing ,financial\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"billion valuation, adam neumann, adam, neumann, saudi\",\n",
" \"topics\": \"billion ,company ,year ,last ,shares\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"netflix, saudi, paperwork, buybacks, generation\",\n",
" \"topics\": \"companies ,tech ,investors ,capital ,year\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"pichai, paperwork, patents, windows, netflix\",\n",
" \"topics\": \"company ,business ,companies ,technology ,years\"\n",
" }\n",
" },\n",
" \"17\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"extradition, lawyers, wanzhou, meng wanzhou, meng\",\n",
" \"topics\": \"china ,last ,company ,year ,market\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"year model, bezels, stylus, battery life, pixel\",\n",
" \"topics\": \"year ,last ,company ,market ,china\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"dollar, razr, price target, market value, challenging\",\n",
" \"topics\": \"market ,company ,china ,year ,last\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"15inch, release date, bezels, ereader, port\",\n",
" \"topics\": \"company ,last ,china ,year ,market\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"south african, rome, vivendi, conglomerate, genish\",\n",
" \"topics\": \"reuters ,reporting ,editing ,market ,company\"\n",
" }\n",
" },\n",
" \"18\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"gdpr, oculus, virtual reality, collins, zuckerberg facebook\",\n",
" \"topics\": \"zuckerberg ,facebook ,mark ,company ,time\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"crowder, label, infowars, wojcicki, alex jones\",\n",
" \"topics\": \"content ,company ,people ,platform ,including\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"banning, discord, milner, white supremacists, supremacists\",\n",
" \"topics\": \"twitter ,media ,social ,people ,users\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"india, trending topics, harvested, gdpr, journalists\",\n",
" \"topics\": \"facebook ,company ,people ,social ,media\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"gdpr, harvested, privacy practices, collins, browsing\",\n",
" \"topics\": \"data ,facebook ,users ,information ,companies\"\n",
" }\n",
" },\n",
" \"19\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"german, dublin, chee, brussels reuters, data united\",\n",
" \"topics\": \"reuters ,us ,european ,reporting ,commission\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"american tech, pressure, facebook amazon, privacy rules, tough\",\n",
" \"topics\": \"companies ,facebook ,tech ,european ,states\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"default, motherboard, version, actually, activities\",\n",
" \"topics\": \"internet ,users ,company ,service ,access\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"care, privacy legislation, americans, privacy rules, activities\",\n",
" \"topics\": \"data ,privacy ,information ,protection ,personal\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"gmail, american tech, dominant, google account, show\",\n",
" \"topics\": \"google ,company ,people ,years ,services\"\n",
" }\n",
" },\n",
" \"2\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"among high, middle high school students, middle high school, eliquid, schuchat\",\n",
" \"topics\": \"vaping ,ecigarettes ,products ,health ,people\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"psychosis, cells, stop smoking, pain management, psychedelics\",\n",
" \"topics\": \"study ,smoking ,found ,people ,health\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"packs, philip morris, morris international, philip morris international, youth tobacco\",\n",
" \"topics\": \"tobacco ,products ,health ,public ,reuters\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"toronto, voters, legalizing, dispensaries, legalized recreational\",\n",
" \"topics\": \"marijuana ,medical ,states ,drug ,health\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"psychedelic, psychedelics, psilocybin, clinical trials, magic\",\n",
" \"topics\": \"drug ,people ,many ,according ,first\"\n",
" }\n",
" },\n",
" \"20\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"hedge, hedge fund, lawsuit, involvement, consortium\",\n",
" \"topics\": \"company ,companies ,year ,last ,around\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"cryptography, child pornography site, pornography site, friends, influence\",\n",
" \"topics\": \"people ,used ,first ,time ,still\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"hashed, dates, warner, personal data, property\",\n",
" \"topics\": \"data ,information ,company ,users ,including\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"pornography site, child pornography site, greatest mysteries, motherboard show, time writing\",\n",
" \"topics\": \"motherboard ,according ,used ,time ,companies\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"krebs, us government, council, fancy, lawsuit\",\n",
" \"topics\": \"security ,according ,last ,including ,information\"\n",
" }\n",
" },\n",
" \"21\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"transplant, microbiome, determined, leads, parasite\",\n",
" \"topics\": \"people ,disease ,time ,might ,make\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"enjoy, crops, determined, space, angeles\",\n",
" \"topics\": \"food ,eating ,health ,year ,many\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"sprouts, purchased, recalling, people recover, food safety inspection service\",\n",
" \"topics\": \"products ,food ,cases ,last ,control\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"norovirus, chipotle, park, swimming, health department\",\n",
" \"topics\": \"people ,according ,cases ,health ,states\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"fractures, risk cancer, placebo, stress, responses\",\n",
" \"topics\": \"study ,researchers ,found ,risk ,health\"\n",
" }\n",
" },\n",
" \"22\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"tweets, savings, porn, dick, something wrong\",\n",
" \"topics\": \"feel ,really ,going ,want ,think\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"squats, getting back, tattoo, hard work, strength training\",\n",
" \"topics\": \"body ,back ,work ,know ,still\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"survival odds, recent study suggests, researchers note, montreal, percent higher\",\n",
" \"topics\": \"health ,university ,found ,many ,help\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"breakup, creativity, rhythms, time people, circadian rhythms\",\n",
" \"topics\": \"people ,time ,work ,make ,much\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"black women, died suicide, reproduction, every years, devastating\",\n",
" \"topics\": \"years ,life ,year ,first ,every\"\n",
" }\n",
" },\n",
" \"3\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"deployed, space exploration, miles hour, rover, soviet\",\n",
" \"topics\": \"year ,world ,around ,every ,according\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"table, spaces, videos, designer, many people\",\n",
" \"topics\": \"people ,make ,many ,world ,still\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"poor climate change voting record, poor climate change, climate change voting, poor climate, poor climate change voting\",\n",
" \"topics\": \"change ,many ,around ,make ,motherboard\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"descent, rover, operational, deployed, soviet\",\n",
" \"topics\": \"first ,time ,years ,much ,last\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"interstellar, miles hour, oldest, dark matter, extraterrestrial\",\n",
" \"topics\": \"research ,scientists ,years ,found ,university\"\n",
" }\n",
" },\n",
" \"4\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"falcon heavy, spacecraft, north korea, korean, blue origin\",\n",
" \"topics\": \"test ,flight ,system ,drone ,drones\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"blue origin, delays, crew dragon, engine, spacecraft\",\n",
" \"topics\": \"company ,first ,time ,year ,last\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"incident, controller, device, commercial drones, mavic\",\n",
" \"topics\": \"drones ,drone ,according ,people ,company\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"mavic, controller, smart, device, incident\",\n",
" \"topics\": \"drone ,drones ,company ,space ,first\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"spaceflight, spacecraft, launch vehicle, falcon heavy, astronauts\",\n",
" \"topics\": \"space ,test ,drone ,drones ,company\"\n",
" }\n",
" },\n",
" \"5\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"projects, verily, medical records, drugstores, regulators\",\n",
" \"topics\": \"people ,health ,services ,according ,years\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"chase, bezos, verily, jp morgan, jp\",\n",
" \"topics\": \"health ,care ,companies ,healthcare ,time\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"fiscal, cents share, cents, weight, forecast\",\n",
" \"topics\": \"company ,million ,year ,last ,first\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"medical records, cancer, researchers, ventures, glucose\",\n",
" \"topics\": \"patients ,medical ,technology ,us ,help\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"fiscal, drugstore chain, adjusted, cents, forecast\",\n",
" \"topics\": \"billion ,percent ,company ,business ,year\"\n",
" }\n",
" },\n",
" \"6\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"model production, tweeted, musk tweeted, secured, tweet\",\n",
" \"topics\": \"tesla ,company ,according ,vehicle ,editing\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"model production, settlement, quarterly, tweeted, tweet\",\n",
" \"topics\": \"company ,year ,first ,last ,make\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"energy vehicles, plugin, hybrid, producing, factories\",\n",
" \"topics\": \"electric ,vehicles ,reuters ,vehicle ,reporting\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"safety driver, testing selfdriving, aurora, taxi, selfdriving vehicle\",\n",
" \"topics\": \"selfdriving ,cars ,vehicles ,technology ,vehicle\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"texting, distracted driving, device, save, safety driver\",\n",
" \"topics\": \"driving ,technology ,according ,make ,editing\"\n",
" }\n",
" },\n",
" \"7\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"okerstrom, graves, ryan, software, electric scooter\",\n",
" \"topics\": \"company ,business ,last ,first ,time\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"passed, wage, independent contractors, drivers uber, drivers\",\n",
" \"topics\": \"drivers ,ridehailing ,including ,uber ,time\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"lime, buses, toronto, labs, software\",\n",
" \"topics\": \"public ,service ,companies ,people ,around\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"airbnb, adjusted, year earlier, went public, sales\",\n",
" \"topics\": \"million ,companies ,ridehailing ,year ,around\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"huffington, arianna, arianna huffington, ryan, graves\",\n",
" \"topics\": \"uber ,people ,ridehailing ,including ,time\"\n",
" }\n",
" },\n",
" \"8\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"assets, creating, arrangement, rural areas, competing\",\n",
" \"topics\": \"time ,people ,year ,companies ,last\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"internet users, free open internet, title, internet advocates, open internet advocates\",\n",
" \"topics\": \"internet ,companies ,users ,services ,service\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"sen, production, open internet advocates, internet advocates, film\",\n",
" \"topics\": \"content ,internet ,service ,company ,streaming\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"music streaming service, music streaming, complaint, warnermedia, debt\",\n",
" \"topics\": \"service ,streaming ,company ,services ,users\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"cricket, music streaming, film, billion year, music streaming service\",\n",
" \"topics\": \"million ,company ,year ,internet ,service\"\n",
" }\n",
" },\n",
" \"9\": {\n",
" \"topic#0\": {\n",
" \"labels\": \"sacklers, cardinal health, cardinal, amerisourcebergen, familiar\",\n",
" \"topics\": \"opioid ,health ,state ,epidemic ,crisis\"\n",
" },\n",
" \"topic#1\": {\n",
" \"labels\": \"drug overdose deaths, stable, guidelines, hydrocodone, prescription painkillers\",\n",
" \"topics\": \"opioids ,opioid ,prescription ,pain ,control\"\n",
" },\n",
" \"topic#2\": {\n",
" \"labels\": \"probe, justice department, disclosed, drugmakers, filing\",\n",
" \"topics\": \"drug ,us ,states ,drugs ,reuters\"\n",
" },\n",
" \"topic#3\": {\n",
" \"labels\": \"drug overdose deaths, white house, numbers, overdose deaths involving, heroin fentanyl\",\n",
" \"topics\": \"drug ,overdose ,deaths ,people ,fentanyl\"\n",
" },\n",
" \"topic#4\": {\n",
" \"labels\": \"stay, many people, associate, gets, addictions\",\n",
" \"topics\": \"addiction ,patients ,treatment ,people ,doctors\"\n",
" }\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# save model_result_train dictionary using the package pickel \n",
"path_file = '/content/drive/MyDrive/GLG_project/GLG_topic_model/train_doc_result.pkl'\n",
"pickle.dump(model_result_train, open(path_file, 'wb')) "
],
"metadata": {
"id": "k_Fb9FSdFcA7"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "7QJ09aT93FIV"
},
"execution_count": null,
"outputs": []
}
]
}