Spaces:
Sleeping
Sleeping
File size: 228,044 Bytes
c2a30b3 |
1 |
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OIZejmI9s81t","outputId":"6aa64326-493f-4512-f100-e149fc4fc044"},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')\n","\n","path = \"/content/drive/My Drive/\""]},{"cell_type":"code","source":["!pip install underthesea"],"metadata":{"collapsed":true,"id":"5_PuAi_DEpot"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["!pip install phonlp"],"metadata":{"id":"rBDxyx1OE0hL"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Import Packages"],"metadata":{"id":"ZcxcP1wLtMOp"}},{"cell_type":"code","source":["import tensorflow as tf\n","from tensorflow.compat.v1 import ConfigProto\n","from tensorflow.compat.v1 import InteractiveSession\n","from tensorflow.keras.models import Sequential, load_model\n","from tensorflow.keras.layers import Input, Bidirectional, LSTM, Dropout, Dense\n","from tensorflow.keras.optimizers import Adam\n","from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n","from transformers import AutoModel, AutoTokenizer\n","import pandas as pd\n","import pickle\n","import numpy as np\n","import time\n","import json\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","import underthesea\n","import re\n","import phonlp\n","import torch"],"metadata":{"id":"yk_WVsSbtPDX"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Function to load models based on model_type\n","def load_models(model_type):\n","\n"," if model_type == 'bilstm_phobertbase':\n"," # Load features and tokenizer\n"," with open(path + 'features_162k_phobertbase.pkl', 'rb') as f:\n"," data_dict = pickle.load(f)\n"," tokenizer = AutoTokenizer.from_pretrained(\"vinai/phobert-base-v2\")\n"," phobert = phobert = AutoModel.from_pretrained(\"vinai/phobert-base-v2\")\n"," max_len = 256\n"," # Load hyperparameters\n"," with open(path + 'hyperparameters/BiLSTM_phobertbase.json', 'r') as json_file:\n"," hyperparameters = json.load(json_file)\n","\n"," else:\n"," raise ValueError(\"Invalid model type specified.\")\n","\n"," return tokenizer, data_dict, hyperparameters, max_len, phobert"],"metadata":{"id":"IEJQFF3jt7iA"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Load data"],"metadata":{"id":"UTghkub8ujX0"}},{"cell_type":"code","source":["# Load model-specific data and configurations\n","tokenizer, data_dict, hyperparameters, max_len, phobert = load_models(\"bilstm_phobertbase\")"],"metadata":{"id":"1RaTIxKYukVk"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Load training, validation, and testing data\n","X_train = np.array(data_dict['X_train'])\n","X_val = np.array(data_dict['X_val'])\n","X_test = np.array(data_dict['X_test'])\n","y_train = data_dict['y_train'].values.astype(int)\n","y_val = data_dict['y_val'].values.astype(int)\n","y_test = data_dict['y_test'].values.astype(int)"],"metadata":{"id":"HvFkwujFu4QH"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(X_train.shape, X_test.shape, X_val.shape)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0lW1HiXUu634","outputId":"7e60f92d-823b-4050-e595-88970836877a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["(113932, 1, 768) (24291, 1, 768) (24126, 1, 768)\n"]}]},{"cell_type":"markdown","source":["## Build Model"],"metadata":{"id":"1jHUq1kOu7_R"}},{"cell_type":"code","source":["# Function to build the BiLSTM model\n","def build_bilstm_model(X_train, y_train, lstm_units_1, lstm_units_2, dense_units, dropout_rate, learning_rate):\n"," model = Sequential()\n"," # Input layer with the shape based on X_train\n"," model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))\n","\n"," # First BiLSTM layer with dropout\n"," model.add(Bidirectional(LSTM(lstm_units_1, return_sequences=True)))\n"," model.add(Dropout(dropout_rate))\n","\n"," # Second BiLSTM layer with dropout\n"," model.add(Bidirectional(LSTM(lstm_units_2, return_sequences=False)))\n"," model.add(Dropout(dropout_rate))\n","\n"," # Dense layer with ReLU activation and dropout\n"," model.add(Dense(dense_units, activation='relu'))\n"," model.add(Dropout(dropout_rate))\n","\n"," # Final Dense layer with softmax activation\n"," model.add(Dense(y_train.shape[1], activation='softmax'))\n","\n"," # Adam optimizer with the specified learning rate\n"," optimizer = Adam(learning_rate=learning_rate)\n","\n"," # Compile the model with categorical crossentropy loss and accuracy metric\n"," model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])\n","\n"," return model\n","\n","# Use the hyperparameters to build the model\n","lstm_units_1 = hyperparameters['lstm_units_1']\n","lstm_units_2 = hyperparameters['lstm_units_2']\n","dense_units = hyperparameters['dense_units']\n","dropout_rate = hyperparameters['dropout_rate']\n","learning_rate = hyperparameters['learning_rate']\n","epochs = hyperparameters['epochs']\n","batch_size = hyperparameters['batch_size']"],"metadata":{"id":"YnnBzosiu9lq"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Build the BiLSTM model\n","model = build_bilstm_model(X_train, y_train, lstm_units_1, lstm_units_2, dense_units, dropout_rate, learning_rate)\n","model.summary()\n","\n","# Print model summary and save model architecture diagram\n","tf.keras.utils.plot_model(model=model, show_shapes=True, dpi=76, to_file=path + 'bilstm_phobertbase_summary.png')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"UW101bI5vhOo","outputId":"3f97f11c-62a1-4fe8-b302-58dc9040cba7"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Model: \"sequential\"\n","_________________________________________________________________\n"," Layer (type) Output Shape Param # \n","=================================================================\n"," bidirectional (Bidirection (None, 1, 448) 1779456 \n"," al) \n"," \n"," dropout (Dropout) (None, 1, 448) 0 \n"," \n"," bidirectional_1 (Bidirecti (None, 288) 683136 \n"," onal) \n"," \n"," dropout_1 (Dropout) (None, 288) 0 \n"," \n"," dense (Dense) (None, 160) 46240 \n"," \n"," dropout_2 (Dropout) (None, 160) 0 \n"," \n"," dense_1 (Dense) (None, 13) 2093 \n"," \n","=================================================================\n","Total params: 2510925 (9.58 MB)\n","Trainable params: 2510925 (9.58 MB)\n","Non-trainable params: 0 (0.00 Byte)\n","_________________________________________________________________\n"]},{"output_type":"execute_result","data":{"image/png":"\n","text/plain":["<IPython.core.display.Image object>"]},"metadata":{},"execution_count":11}]},{"cell_type":"code","source":["# Start training time measurement\n","start_time = time.time()\n","\n","# Train the model\n","history = model.fit(\n"," X_train, y_train,\n"," epochs=epochs,\n"," batch_size=batch_size,\n"," validation_data=(X_val, y_val)\n",")\n","\n","# End training time measurement\n","end_time = time.time()\n","\n","# Calculate training time\n","training_time = end_time - start_time\n","print(f'Training time: {training_time:.2f} seconds')\n","\n","# Save training time to JSON file\n","training_time_data = {\n"," 'training_time_seconds': training_time\n","}"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"VkEMoJclvrP5","outputId":"9d3fb2bb-f212-42ae-8129-83d8e1a099e5"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Epoch 1/30\n","594/594 [==============================] - 17s 13ms/step - loss: 0.6482 - accuracy: 0.7974 - val_loss: 0.4288 - val_accuracy: 0.8540\n","Epoch 2/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.4604 - accuracy: 0.8516 - val_loss: 0.4039 - val_accuracy: 0.8618\n","Epoch 3/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.4353 - accuracy: 0.8582 - val_loss: 0.3959 - val_accuracy: 0.8655\n","Epoch 4/30\n","594/594 [==============================] - 5s 9ms/step - loss: 0.4166 - accuracy: 0.8632 - val_loss: 0.3809 - val_accuracy: 0.8694\n","Epoch 5/30\n","594/594 [==============================] - 7s 11ms/step - loss: 0.4020 - accuracy: 0.8670 - val_loss: 0.3759 - val_accuracy: 0.8702\n","Epoch 6/30\n","594/594 [==============================] - 5s 9ms/step - loss: 0.3850 - accuracy: 0.8706 - val_loss: 0.3698 - val_accuracy: 0.8723\n","Epoch 7/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.3773 - accuracy: 0.8741 - val_loss: 0.3666 - val_accuracy: 0.8742\n","Epoch 8/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.3642 - accuracy: 0.8777 - val_loss: 0.3549 - val_accuracy: 0.8756\n","Epoch 9/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.3564 - accuracy: 0.8795 - val_loss: 0.3592 - val_accuracy: 0.8761\n","Epoch 10/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.3444 - accuracy: 0.8827 - val_loss: 0.3551 - val_accuracy: 0.8774\n","Epoch 11/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.3372 - accuracy: 0.8845 - val_loss: 0.3508 - val_accuracy: 0.8776\n","Epoch 12/30\n","594/594 [==============================] - 7s 11ms/step - loss: 0.3289 - accuracy: 0.8878 - val_loss: 0.3463 - val_accuracy: 0.8778\n","Epoch 13/30\n","594/594 [==============================] - 5s 9ms/step - loss: 0.3206 - accuracy: 0.8906 - val_loss: 0.3471 - val_accuracy: 0.8788\n","Epoch 14/30\n","594/594 [==============================] - 7s 12ms/step - loss: 0.3141 - accuracy: 0.8918 - val_loss: 0.3451 - val_accuracy: 0.8791\n","Epoch 15/30\n","594/594 [==============================] - 5s 9ms/step - loss: 0.3039 - accuracy: 0.8941 - val_loss: 0.3420 - val_accuracy: 0.8816\n","Epoch 16/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.2972 - accuracy: 0.8975 - val_loss: 0.3488 - val_accuracy: 0.8813\n","Epoch 17/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.2899 - accuracy: 0.8992 - val_loss: 0.3481 - val_accuracy: 0.8803\n","Epoch 18/30\n","594/594 [==============================] - 7s 11ms/step - loss: 0.2828 - accuracy: 0.9010 - val_loss: 0.3451 - val_accuracy: 0.8839\n","Epoch 19/30\n","594/594 [==============================] - 6s 11ms/step - loss: 0.2743 - accuracy: 0.9031 - val_loss: 0.3482 - val_accuracy: 0.8819\n","Epoch 20/30\n","594/594 [==============================] - 5s 9ms/step - loss: 0.2669 - accuracy: 0.9060 - val_loss: 0.3463 - val_accuracy: 0.8826\n","Epoch 21/30\n","594/594 [==============================] - 6s 11ms/step - loss: 0.2580 - accuracy: 0.9085 - val_loss: 0.3468 - val_accuracy: 0.8829\n","Epoch 22/30\n","594/594 [==============================] - 5s 9ms/step - loss: 0.2522 - accuracy: 0.9107 - val_loss: 0.3546 - val_accuracy: 0.8826\n","Epoch 23/30\n","594/594 [==============================] - 7s 11ms/step - loss: 0.2446 - accuracy: 0.9131 - val_loss: 0.3601 - val_accuracy: 0.8839\n","Epoch 24/30\n","594/594 [==============================] - 5s 9ms/step - loss: 0.2375 - accuracy: 0.9148 - val_loss: 0.3545 - val_accuracy: 0.8849\n","Epoch 25/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.2307 - accuracy: 0.9166 - val_loss: 0.3692 - val_accuracy: 0.8837\n","Epoch 26/30\n","594/594 [==============================] - 6s 10ms/step - loss: 0.2218 - accuracy: 0.9200 - val_loss: 0.3768 - val_accuracy: 0.8847\n","Epoch 27/30\n","594/594 [==============================] - 6s 9ms/step - loss: 0.2201 - accuracy: 0.9207 - val_loss: 0.3743 - val_accuracy: 0.8812\n","Epoch 28/30\n","594/594 [==============================] - 7s 11ms/step - loss: 0.2106 - accuracy: 0.9235 - val_loss: 0.3814 - val_accuracy: 0.8819\n","Epoch 29/30\n","594/594 [==============================] - 5s 9ms/step - loss: 0.2063 - accuracy: 0.9250 - val_loss: 0.3861 - val_accuracy: 0.8837\n","Epoch 30/30\n","594/594 [==============================] - 6s 11ms/step - loss: 0.1998 - accuracy: 0.9276 - val_loss: 0.3860 - val_accuracy: 0.8854\n","Training time: 192.62 seconds\n"]}]},{"cell_type":"code","source":["model.save(path + 'bilstm_phobertbase.h5')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wuNbWI-Iwry9","outputId":"f1a1e509-d31f-4b4f-d820-a24224e76e24"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n"," saving_api.save_model(\n"]}]},{"cell_type":"markdown","source":["## Evaluate"],"metadata":{"id":"KJRho_FBw0II"}},{"cell_type":"code","source":["# Define class names\n","class_names = ['Cong nghe', 'Doi song', 'Giai tri', 'Giao duc', 'Khoa hoc', 'Kinh te',\n"," 'Nha dat', 'Phap luat', 'The gioi', 'The thao', 'Van hoa', 'Xa hoi', 'Xe co']\n","\n","# Define evaluation function\n","def evaluate_model(model, X_test, y_test, class_names):\n"," y_pred = model.predict(X_test)\n"," y_pred_classes = np.argmax(y_pred, axis=1)\n"," y_true = np.argmax(y_test, axis=1)\n","\n"," accuracy = accuracy_score(y_true, y_pred_classes)\n"," conf_matrix = confusion_matrix(y_true, y_pred_classes)\n"," class_report = classification_report(y_true, y_pred_classes, target_names=class_names)\n","\n"," # Convert classification report to DataFrame\n"," report_dict = classification_report(y_true, y_pred_classes, target_names=class_names, output_dict=True)\n"," report_df = pd.DataFrame(report_dict).transpose()\n","\n"," return conf_matrix, report_df"],"metadata":{"id":"pgwbKsEzwwb8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Evaluate the model on test data\n","conf_matrix, report_df = evaluate_model(model, X_test, y_test, class_names)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"T6v60aS6w4Lb","outputId":"5209e5de-4693-4f25-e5a3-58fc9e3e3ba4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["760/760 [==============================] - 9s 8ms/step\n"]}]},{"cell_type":"code","source":["report_df"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":551},"id":"aRd0N0CJw7Cr","outputId":"72b7ec4a-c6e8-40e4-d09f-49be3db2ab1c"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" precision recall f1-score support\n","Cong nghe 0.916376 0.924971 0.920653 1706.000000\n","Doi song 0.809365 0.868941 0.838095 1671.000000\n","Giai tri 0.905063 0.913514 0.909269 2035.000000\n","Giao duc 0.915953 0.916935 0.916443 1866.000000\n","Khoa hoc 0.894380 0.864232 0.879048 2136.000000\n","Kinh te 0.868528 0.842442 0.855286 2031.000000\n","Nha dat 0.860921 0.891828 0.876102 2117.000000\n","Phap luat 0.878977 0.843208 0.860721 1671.000000\n","The gioi 0.910120 0.902310 0.906198 1515.000000\n","The thao 0.964247 0.970556 0.967391 1834.000000\n","Van hoa 0.818491 0.796797 0.807499 1811.000000\n","Xa hoi 0.808928 0.802809 0.805857 1851.000000\n","Xe co 0.943614 0.956522 0.950024 2047.000000\n","accuracy 0.884607 0.884607 0.884607 0.884607\n","macro avg 0.884228 0.884236 0.884045 24291.000000\n","weighted avg 0.884728 0.884607 0.884486 24291.000000"],"text/html":["\n"," <div id=\"df-2559f341-03b8-4637-8c1a-404c125da91d\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>precision</th>\n"," <th>recall</th>\n"," <th>f1-score</th>\n"," <th>support</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>Cong nghe</th>\n"," <td>0.916376</td>\n"," <td>0.924971</td>\n"," <td>0.920653</td>\n"," <td>1706.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Doi song</th>\n"," <td>0.809365</td>\n"," <td>0.868941</td>\n"," <td>0.838095</td>\n"," <td>1671.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Giai tri</th>\n"," <td>0.905063</td>\n"," <td>0.913514</td>\n"," <td>0.909269</td>\n"," <td>2035.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Giao duc</th>\n"," <td>0.915953</td>\n"," <td>0.916935</td>\n"," <td>0.916443</td>\n"," <td>1866.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Khoa hoc</th>\n"," <td>0.894380</td>\n"," <td>0.864232</td>\n"," <td>0.879048</td>\n"," <td>2136.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Kinh te</th>\n"," <td>0.868528</td>\n"," <td>0.842442</td>\n"," <td>0.855286</td>\n"," <td>2031.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Nha dat</th>\n"," <td>0.860921</td>\n"," <td>0.891828</td>\n"," <td>0.876102</td>\n"," <td>2117.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Phap luat</th>\n"," <td>0.878977</td>\n"," <td>0.843208</td>\n"," <td>0.860721</td>\n"," <td>1671.000000</td>\n"," </tr>\n"," <tr>\n"," <th>The gioi</th>\n"," <td>0.910120</td>\n"," <td>0.902310</td>\n"," <td>0.906198</td>\n"," <td>1515.000000</td>\n"," </tr>\n"," <tr>\n"," <th>The thao</th>\n"," <td>0.964247</td>\n"," <td>0.970556</td>\n"," <td>0.967391</td>\n"," <td>1834.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Van hoa</th>\n"," <td>0.818491</td>\n"," <td>0.796797</td>\n"," <td>0.807499</td>\n"," <td>1811.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Xa hoi</th>\n"," <td>0.808928</td>\n"," <td>0.802809</td>\n"," <td>0.805857</td>\n"," <td>1851.000000</td>\n"," </tr>\n"," <tr>\n"," <th>Xe co</th>\n"," <td>0.943614</td>\n"," <td>0.956522</td>\n"," <td>0.950024</td>\n"," <td>2047.000000</td>\n"," </tr>\n"," <tr>\n"," <th>accuracy</th>\n"," <td>0.884607</td>\n"," <td>0.884607</td>\n"," <td>0.884607</td>\n"," <td>0.884607</td>\n"," </tr>\n"," <tr>\n"," <th>macro avg</th>\n"," <td>0.884228</td>\n"," <td>0.884236</td>\n"," <td>0.884045</td>\n"," <td>24291.000000</td>\n"," </tr>\n"," <tr>\n"," <th>weighted avg</th>\n"," <td>0.884728</td>\n"," <td>0.884607</td>\n"," <td>0.884486</td>\n"," <td>24291.000000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2559f341-03b8-4637-8c1a-404c125da91d')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-2559f341-03b8-4637-8c1a-404c125da91d button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-2559f341-03b8-4637-8c1a-404c125da91d');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n","<div id=\"df-131d747d-9f3e-4cd8-9875-f4ffa962785c\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-131d747d-9f3e-4cd8-9875-f4ffa962785c')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-131d747d-9f3e-4cd8-9875-f4ffa962785c button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n","</div>\n","\n"," <div id=\"id_2d801675-05fe-4fbe-b0c6-65513409c560\">\n"," <style>\n"," .colab-df-generate {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-generate:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-generate {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-generate:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n"," <button class=\"colab-df-generate\" onclick=\"generateWithVariable('report_df')\"\n"," title=\"Generate code using this dataframe.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n"," </svg>\n"," </button>\n"," <script>\n"," (() => {\n"," const buttonEl =\n"," document.querySelector('#id_2d801675-05fe-4fbe-b0c6-65513409c560 button.colab-df-generate');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," buttonEl.onclick = () => {\n"," google.colab.notebook.generateWithVariable('report_df');\n"," }\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"report_df","summary":"{\n \"name\": \"report_df\",\n \"rows\": 16,\n \"fields\": [\n {\n \"column\": \"precision\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.04441544001290157,\n \"min\": 0.8089275993467611,\n \"max\": 0.9642470205850487,\n \"num_unique_values\": 16,\n \"samples\": [\n 0.9163763066202091,\n 0.8093645484949833,\n 0.8685279187817259\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"recall\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.048295354042426124,\n \"min\": 0.7967973495306461,\n \"max\": 0.9705561613958561,\n \"num_unique_values\": 15,\n \"samples\": [\n 0.9705561613958561,\n 0.8028092922744462,\n 0.9249706916764361\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"f1-score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.04498920767252637,\n \"min\": 0.8058568329718006,\n \"max\": 0.9673913043478262,\n \"num_unique_values\": 16,\n \"samples\": [\n 0.9206534422403735,\n 0.8380952380952381,\n 0.8552861784553861\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"support\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7720.254005627544,\n \"min\": 0.884607467786423,\n \"max\": 24291.0,\n \"num_unique_values\": 14,\n \"samples\": [\n 1811.0,\n 2047.0,\n 1706.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":16}]},{"cell_type":"code","source":["plt.figure(figsize=(10, 8))\n","sns.heatmap(conf_matrix, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names)\n","\n","# Đặt tiêu đề và nhãn cho trục\n","plt.title('Confusion Matrix for BiLSTM_PhoBert')\n","plt.xlabel('Predicted Labels')\n","plt.ylabel('True Labels')\n","\n","plt.savefig(path + 'confusion_matrix_bilstm_phobertbase.png')\n","\n","plt.show()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":612},"id":"4HoiQQMrw9-H","outputId":"8e46b636-420e-4897-bb44-bb51cc9cfa27"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["<Figure size 1000x800 with 2 Axes>"],"image/png":"\n"},"metadata":{}}]},{"cell_type":"code","source":["report_df.to_csv(path + 'classification_report_bilstm_phobertbase.csv', index=True)"],"metadata":{"id":"36f_jnqlxGtk"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Predict"],"metadata":{"id":"xWHClMYoxVyb"}},{"cell_type":"code","source":["nlp_model = phonlp.load(save_dir=path + \"phonlp\")\n","\n","# Function to preprocess text\n","def preprocess_text(text):\n"," text = re.sub(r'[^\\w\\s.]', '', text)\n"," sentences = underthesea.sent_tokenize(text)\n"," preprocessed_words = []\n"," for sentence in sentences:\n"," try:\n"," word_tokens = underthesea.word_tokenize(sentence, format=\"text\")\n"," tags = nlp_model.annotate(word_tokens, batch_size=64)\n"," filtered_words = [word.lower() for word, tag in zip(tags[0][0], tags[1][0]) if tag[0] not in ['M', 'X', 'CH']\n"," and word not in [\"'\", \",\"]]\n"," preprocessed_words.extend(filtered_words)\n"," except Exception as e:\n"," pass\n"," return ' '.join(preprocessed_words)\n","\n","# Function to create BERT features\n","def make_bert_features(v_text, max_len):\n"," v_tokenized = []\n"," for i_text in v_text:\n"," line = tokenizer.encode(i_text, truncation=True)\n"," v_tokenized.append(line)\n"," padded = []\n"," for i in v_tokenized:\n"," if len(i) < max_len:\n"," padded.append(i + [1] * (max_len - len(i)))\n"," else:\n"," padded.append(i[:max_len])\n"," padded = np.array(padded)\n"," attention_mask = np.where(padded == 1, 0, 1)\n"," padded = torch.tensor(padded).to(torch.long)\n"," attention_mask = torch.tensor(attention_mask)\n"," with torch.no_grad():\n"," last_hidden_states = phobert(input_ids=padded, attention_mask=attention_mask)\n"," v_features = last_hidden_states[0][:, 0, :].numpy()\n"," return v_features\n","\n","def predict_label(text, tokenizer, phobert, model, class_names, max_len):\n"," text = preprocess_text(text)\n"," # Encode text using BERT tokenizer and create BERT features\n"," encoded_text = make_bert_features([text], max_len)\n"," encoded_text = np.expand_dims(encoded_text, axis=1) # Add a new dimension\n","\n"," # Predict probabilities\n"," prediction = model.predict(encoded_text)\n","\n"," # Get predicted label\n"," predicted_label_index = np.argmax(prediction, axis=1)[0]\n"," predicted_label = class_names[predicted_label_index]\n","\n"," # Create confidence DataFrame\n"," confidences = {class_names[i]: float(prediction[0][i]) for i in range(len(prediction[0]))}\n"," confidence_df = pd.DataFrame(confidences, index=[0])\n","\n"," return predicted_label, confidence_df"],"metadata":{"id":"pHuVn8qBxNbU"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["text = \"\"\"\n","Dự án Trung tâm Nghiên cứu khoa học công nghệ hạt nhân (CNST) được thực hiện theo Hiệp định Liên Chính phủ ký năm 2011. Dự án đã được Chính phủ Việt Nam phê duyệt chủ trương đầu tư năm 2018. CNST dự kiến đặt tại TP Long Khánh, Đồng Nai. Trung tâm này sẽ có lò phản ứng hạt nhân dạng bể, công suất 10 MW, sử dụng nhiên liệu độ giàu thấp do Nga chế tạo. CNST tập trung lĩnh vực vật liệu chiếu xạ, khoa học sinh học, đồng vị phóng xạ, kỹ thuật lò phản ứng, an toàn bức xạ; nghiên cứu điều chế dược chất mới trong điều trị ung thư, nghiên cứu chiếu xạ silic - vật liệu bán dẫn, tán xạ góc nhỏ...\n","\n","Ông Trần Chí Thành, Viện trưởng Viện Năng lượng nguyên tử (Bộ Khoa học và Công nghệ) cho biết \"đây là lần đầu tiên Việt Nam triển khai một dự án về xây dựng lò phản ứng hạt nhân nghiên cứu công suất lớn\".\n","\n","Theo ông Thành, để triển khai Dự án CNST, Bộ Khoa học và Công nghệ đã có những phương án chuẩn bị nguồn nhân lực quản lý và triển khai ở các giai đoạn khác nhau. Bộ cũng đưa ra kế hoạch chuẩn bị nguồn nhân lực cho vận hành đảm bảo an toàn, khai thác hiệu quả Trung tâm sau khi đi vào hoạt động.\n","\n","Để hỗ trợ thẩm tra, thẩm định Báo cáo nghiên cứu khả thi, Báo cáo phân tích an toàn và hồ sơ thiết kế, Bộ Khoa học và Công nghệ đề nghị Tập đoàn Năng lượng Nguyên tử Quốc gia Liên bang Nga (Rosatom) tạo điều kiện cho một số cán bộ Việt Nam tham gia thực hiện thiết kế cơ sở của lò phản ứng và các tính toán, phân tích an toàn đi kèm. Rosatom cũng giúp Việt Nam trong đào tạo cán bộ vận hành lò phản ứng nghiên cứu.\n","\n","Viện Năng lượng nguyên tử Việt Nam cũng xây dựng các nhóm chuyên môn sâu về vật lý lò, thiết kế sử dụng kênh ngang, sản xuất đồng vị phóng xạ trên lò nghiên cứu, nghiên cứu vật liệu, chiếu xạ silic làm bán dẫn, nghiên cứu phân tích kích hoạt, bảo vệ môi trường, an toàn hạt nhân. Điều này nhằm xây dựng nguồn cán bộ nghiên cứu, ứng dụng khai thác hiệu quả lò nghiên cứu mới, đảm bảo an toàn khi CNST đi vào hoạt động\n","\n","Trước đó tháng 10/2017, Viện ký thỏa thuận hợp tác với Trường Đại học nghiên cứu Bách khoa Tomsk và Đại học Nghiên cứu Hạt nhân Quốc gia Nga (MEPhI) vào tháng 12/2023, về hợp tác nghiên cứu và đào tạo cán bộ trong các lĩnh vực năng lượng nguyên tử có liên quan.\n","\n","Viện trưởng Trần Chí Thành cho biết thêm, trước mắt Việt Nam và Nga sẽ tập trung đẩy mạnh triển khai thực hiện Dự án đảm bảo đúng tiến độ, hiệu quả, tuân thủ các quy định của Cơ quan Năng lượng nguyên tử quốc tế (IAEA).\n","\n","Theo Quy hoạch phát triển, ứng dụng năng lượng nguyên tử giai đoạn 2021 - 2030, tầm nhìn 2050, hướng nghiên cứu ứng dụng năng lượng nguyên tử sẽ tập trung cả khoa học cơ bản (vật lý hạt nhân, vật lý lò, an toàn và thủy nhiệt, tự động điều khiển, vật liệu, hóa học ...) và ứng dụng trong y tế (y học bức xạ) nông nghiệp; công nghiệp; tài nguyên môi trường (nước ngầm, ô nhiễm, phát tán phóng xạ, xói mòn đất, chất thải phóng xạ, đuôi quặng)...\n","\n","Ngoài ra, trong quy hoạch phát triển ứng dụng năng lượng nguyên tử giai đoạn tới sẽ nghiên cứu tiền khả thi dự án xây dựng tổ hợp máy gia tốc lớn đặt tại miền Bắc, xây dựng các phòng thí nghiệm công nghệ và an toàn hạt nhân...\n","\"\"\"\n","\n","predicted_label, confidences = predict_label(text, tokenizer, phobert, model, class_names, max_len)\n","print(f\"The predicted label for the new text is: {predicted_label}\")\n","print(\"Confidence scores for each label:\")\n","confidences"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":413},"id":"FGmVwMeax29y","outputId":"fbac57ec-86eb-46d7-e964-fb2403530cfb"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["100%|██████████| 1/1 [00:01<00:00, 1.90s/it]\n","100%|██████████| 1/1 [00:00<00:00, 21.88it/s]\n","100%|██████████| 1/1 [00:00<00:00, 32.12it/s]\n","100%|██████████| 1/1 [00:00<00:00, 29.64it/s]\n","100%|██████████| 1/1 [00:00<00:00, 22.67it/s]\n","100%|██████████| 1/1 [00:00<00:00, 25.53it/s]\n","100%|██████████| 1/1 [00:00<00:00, 27.02it/s]\n","100%|██████████| 1/1 [00:00<00:00, 25.41it/s]\n","100%|██████████| 1/1 [00:00<00:00, 21.28it/s]\n","100%|██████████| 1/1 [00:00<00:00, 28.80it/s]\n","100%|██████████| 1/1 [00:00<00:00, 24.91it/s]\n","100%|██████████| 1/1 [00:00<00:00, 27.44it/s]\n","100%|██████████| 1/1 [00:00<00:00, 23.32it/s]\n","Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"]},{"output_type":"stream","name":"stdout","text":["1/1 [==============================] - 0s 63ms/step\n","The predicted label for the new text is: Khoa hoc\n","Confidence scores for each label:\n"]},{"output_type":"execute_result","data":{"text/plain":[" Cong nghe Doi song Giai tri Giao duc Khoa hoc Kinh te Nha dat \\\n","0 0.000723 0.025548 0.000074 0.00598 0.956909 0.000189 0.001694 \n","\n"," Phap luat The gioi The thao Van hoa Xa hoi Xe co \n","0 0.000049 0.00043 0.000061 0.000091 0.001248 0.007005 "],"text/html":["\n"," <div id=\"df-884817c4-8881-4c91-95cd-fc0f2982595c\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Cong nghe</th>\n"," <th>Doi song</th>\n"," <th>Giai tri</th>\n"," <th>Giao duc</th>\n"," <th>Khoa hoc</th>\n"," <th>Kinh te</th>\n"," <th>Nha dat</th>\n"," <th>Phap luat</th>\n"," <th>The gioi</th>\n"," <th>The thao</th>\n"," <th>Van hoa</th>\n"," <th>Xa hoi</th>\n"," <th>Xe co</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0.000723</td>\n"," <td>0.025548</td>\n"," <td>0.000074</td>\n"," <td>0.00598</td>\n"," <td>0.956909</td>\n"," <td>0.000189</td>\n"," <td>0.001694</td>\n"," <td>0.000049</td>\n"," <td>0.00043</td>\n"," <td>0.000061</td>\n"," <td>0.000091</td>\n"," <td>0.001248</td>\n"," <td>0.007005</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-884817c4-8881-4c91-95cd-fc0f2982595c')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-884817c4-8881-4c91-95cd-fc0f2982595c button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-884817c4-8881-4c91-95cd-fc0f2982595c');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"id_1317d2a4-7b3b-4faa-b58d-ff29dfba7d6f\">\n"," <style>\n"," .colab-df-generate {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-generate:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-generate {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-generate:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n"," <button class=\"colab-df-generate\" onclick=\"generateWithVariable('confidences')\"\n"," title=\"Generate code using this dataframe.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n"," </svg>\n"," </button>\n"," <script>\n"," (() => {\n"," const buttonEl =\n"," document.querySelector('#id_1317d2a4-7b3b-4faa-b58d-ff29dfba7d6f button.colab-df-generate');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," buttonEl.onclick = () => {\n"," google.colab.notebook.generateWithVariable('confidences');\n"," }\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"confidences","summary":"{\n \"name\": \"confidences\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"Cong nghe\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.000722816155757755,\n \"max\": 0.000722816155757755,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.000722816155757755\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Doi song\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.0255478136241436,\n \"max\": 0.0255478136241436,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.0255478136241436\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Giai tri\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 7.399600144708529e-05,\n \"max\": 7.399600144708529e-05,\n \"num_unique_values\": 1,\n \"samples\": [\n 7.399600144708529e-05\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Giao duc\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.00598022760823369,\n \"max\": 0.00598022760823369,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.00598022760823369\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Khoa hoc\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.9569094777107239,\n \"max\": 0.9569094777107239,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.9569094777107239\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Kinh te\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.00018913984240498394,\n \"max\": 0.00018913984240498394,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.00018913984240498394\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Nha dat\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.00169356819242239,\n \"max\": 0.00169356819242239,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.00169356819242239\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Phap luat\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 4.861697743763216e-05,\n \"max\": 4.861697743763216e-05,\n \"num_unique_values\": 1,\n \"samples\": [\n 4.861697743763216e-05\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"The gioi\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.0004296234983485192,\n \"max\": 0.0004296234983485192,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.0004296234983485192\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"The thao\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 6.1426566389855e-05,\n \"max\": 6.1426566389855e-05,\n \"num_unique_values\": 1,\n \"samples\": [\n 6.1426566389855e-05\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Van hoa\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 9.067390783457085e-05,\n \"max\": 9.067390783457085e-05,\n \"num_unique_values\": 1,\n \"samples\": [\n 9.067390783457085e-05\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Xa hoi\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.001247699954546988,\n \"max\": 0.001247699954546988,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.001247699954546988\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Xe co\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.007004955783486366,\n \"max\": 0.007004955783486366,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.007004955783486366\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":20}]}]} |