{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sentiment Analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports, constants and setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import time\n", "import tracemalloc\n", "import warnings\n", "from collections import Counter\n", "\n", "import joblib\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier, VotingClassifier\n", "from sklearn.exceptions import ConvergenceWarning\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, roc_auc_score\n", "from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold, train_test_split\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.tree import DecisionTreeClassifier\n", "from tqdm.notebook import tqdm\n", "from wordcloud import WordCloud\n", "\n", "from app.constants import CACHE_DIR, DATA_DIR\n", "from app.data import load_data, tokenize\n", "from app.model import _get_vectorizer" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "tqdm.pandas()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "SEED = 42\n", "CACHE = joblib.Memory(CACHE_DIR, verbose=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data loading" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textsentiment
0The impact of educational reforms remains unce...1
1Critics argue that recent improvements in the ...0
2Innovative teaching methods have led to unexpe...1
3Despite budget constraints, the school has man...1
4The true effectiveness of online learning plat...0
\n", "
" ], "text/plain": [ " text sentiment\n", "0 The impact of educational reforms remains unce... 1\n", "1 Critics argue that recent improvements in the ... 0\n", "2 Innovative teaching methods have led to unexpe... 1\n", "3 Despite budget constraints, the school has man... 1\n", "4 The true effectiveness of online learning plat... 0" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load data and convert to pandas DataFrame\n", "text_data, label_data = load_data(\"test\")\n", "dataset = pd.DataFrame({\"text\": text_data, \"sentiment\": label_data})\n", "dataset.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Cleaning: 100%|██████████| 209/209 [00:01<00:00, 119.42doc/s]\n", "Lemmatization: 100%|██████████| 209/209 [00:00<00:00, 395.78doc/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textsentimenttokens
0The impact of educational reforms remains unce...1impact educational reform remain uncertain des...
1Critics argue that recent improvements in the ...0critic argue recent improvement school system ...
2Innovative teaching methods have led to unexpe...1innovative teaching method lead unexpected cha...
3Despite budget constraints, the school has man...1despite budget constraint school manage mainta...
4The true effectiveness of online learning plat...0true effectiveness online learning platform ma...
\n", "
" ], "text/plain": [ " text sentiment \\\n", "0 The impact of educational reforms remains unce... 1 \n", "1 Critics argue that recent improvements in the ... 0 \n", "2 Innovative teaching methods have led to unexpe... 1 \n", "3 Despite budget constraints, the school has man... 1 \n", "4 The true effectiveness of online learning plat... 0 \n", "\n", " tokens \n", "0 impact educational reform remain uncertain des... \n", "1 critic argue recent improvement school system ... \n", "2 innovative teaching method lead unexpected cha... \n", "3 despite budget constraint school manage mainta... \n", "4 true effectiveness online learning platform ma... " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Tokenize text data\n", "tokens = tokenize(dataset[\"text\"].tolist(), batch_size=1024, n_jobs=2, show_progress=True)\n", "dataset[\"tokens\"] = tokens.apply(\" \".join)\n", "dataset.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data exploration" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sentiment distribution" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhwAAAFzCAYAAAB1tNBuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAlWUlEQVR4nO3de1zUdb7H8TfIXWC8NkCLQua1TDNbg9paFUNze+jGo3KzUtfUTDQ1142zqdWpUE8pR2Ol9ux62aPHLmdzzYxSvFWSF6zMMrVW01UumcIIBiLzPX90nHUSLJGvA/h6Ph7zeDS/Gx94+MuXP34z42eMMQIAALDI39cDAACAxo/gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUBvh6gPnC73Tpy5IgiIiLk5+fn63EAAGgwjDE6ceKEYmJi5O9f83UMgkPSkSNHFBsb6+sxAABosA4dOqSf/exnNa4nOCRFRERI+v6HFRkZ6eNpAABoOFwul2JjYz1/l9aE4JA8v0aJjIwkOAAAqIUfuyWBm0YBAIB1BAcAALCO4AAAANYRHAAAwDqCAwAAWEdwAAAA6wgOAABgHcEBAACsIzgAAIB1BAcAALCO4AAAANbxWSqXmbjH3/L1CKhDB2YO9PUIAPCTcIUDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHU+DY5NmzbpzjvvVExMjPz8/LRixQqv9cYYTZ8+XdHR0QoNDVVSUpL27dvntc2xY8c0dOhQRUZGqlmzZho5cqRKS0sv4XcBAAB+jE+Do6ysTN26dVNmZma162fPnq158+YpKytLW7ZsUdOmTZWcnKzy8nLPNkOHDtVnn32mNWvWaNWqVdq0aZNGjx59qb4FAADwEwT48osPGDBAAwYMqHadMUYZGRl64oknNGjQIEnSkiVL5HQ6tWLFCg0ZMkS7d+9Wdna2tm3bpp49e0qS5s+frzvuuEPPP/+8YmJiLtn3AgAAalZv7+HYv3+/CgoKlJSU5FnmcDjUq1cv5ebmSpJyc3PVrFkzT2xIUlJSkvz9/bVly5Yaj11RUSGXy+X1AAAA9tTb4CgoKJAkOZ1Or+VOp9OzrqCgQFdccYXX+oCAALVo0cKzTXXS09PlcDg8j9jY2DqeHgAAnK3eBodNaWlpKikp8TwOHTrk65EAAGjU6m1wREVFSZIKCwu9lhcWFnrWRUVFqaioyGv96dOndezYMc821QkODlZkZKTXAwAA2FNvgyM+Pl5RUVHKycnxLHO5XNqyZYsSEhIkSQkJCSouLlZeXp5nm3Xr1sntdqtXr16XfGYAAFA9n75KpbS0VF9++aXn+f79+/Xxxx+rRYsWatOmjSZOnKhnnnlG7du3V3x8vKZNm6aYmBgNHjxYktS5c2f1799fo0aNUlZWliorK5WamqohQ4bwChUAAOoRnwbH9u3b1bt3b8/zyZMnS5KGDRumRYsWaerUqSorK9Po0aNVXFysW265RdnZ2QoJCfHss3TpUqWmpqpv377y9/dXSkqK5s2bd8m/FwAAUDM/Y4zx9RC+5nK55HA4VFJS0ujv54h7/C1fj4A6dGDmQF+PAOAy91P/Dq2393AAAIDGg+AAAADWERwAAMA6ggMAAFjn01epAAD+hZu6Gx9u7P4XrnAAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHX1Ojiqqqo0bdo0xcfHKzQ0VO3atdO///u/yxjj2cYYo+nTpys6OlqhoaFKSkrSvn37fDg1AAD4oXodHLNmzdKCBQv04osvavfu3Zo1a5Zmz56t+fPne7aZPXu25s2bp6ysLG3ZskVNmzZVcnKyysvLfTg5AAA4W4CvBzifzZs3a9CgQRo4cKAkKS4uTv/zP/+jrVu3Svr+6kZGRoaeeOIJDRo0SJK0ZMkSOZ1OrVixQkOGDPHZ7AAA4F/q9RWOxMRE5eTkaO/evZKkTz75RO+//74GDBggSdq/f78KCgqUlJTk2cfhcKhXr17Kzc2t8bgVFRVyuVxeDwAAYE+9vsLx+OOPy+VyqVOnTmrSpImqqqr07LPPaujQoZKkgoICSZLT6fTaz+l0etZVJz09XU899ZS9wQEAgJd6fYXj1Vdf1dKlS7Vs2TLt2LFDixcv1vPPP6/Fixdf1HHT0tJUUlLieRw6dKiOJgYAANWp11c4fve73+nxxx/33IvRtWtXff3110pPT9ewYcMUFRUlSSosLFR0dLRnv8LCQnXv3r3G4wYHBys4ONjq7AAA4F/q9RWOkydPyt/fe8QmTZrI7XZLkuLj4xUVFaWcnBzPepfLpS1btighIeGSzgoAAGpWr69w3HnnnXr22WfVpk0bXXPNNfroo480Z84c/fa3v5Uk+fn5aeLEiXrmmWfUvn17xcfHa9q0aYqJidHgwYN9OzwAAPCo18Exf/58TZs2TY888oiKiooUExOjMWPGaPr06Z5tpk6dqrKyMo0ePVrFxcW65ZZblJ2drZCQEB9ODgAAzuZnzn7bzsuUy+WSw+FQSUmJIiMjfT2OVXGPv+XrEVCHDswc6OsRUIc4Pxufy+Ec/al/h9brezgAAEDjQHAAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHW1Co6rrrpK33777TnLi4uLddVVV130UAAAoHGpVXAcOHBAVVVV5yyvqKjQ4cOHL3ooAADQuARcyMYrV670/Pc777wjh8PheV5VVaWcnBzFxcXV2XAAAKBxuKDgGDx4sCTJz89Pw4YN81oXGBiouLg4vfDCC3U2HAAAaBwuKDjcbrckKT4+Xtu2bVOrVq2sDAUAABqXCwqOM/bv31/XcwAAgEasVsEhSTk5OcrJyVFRUZHnyscZf/nLXy56MAAA0HjUKjieeuopPf300+rZs6eio6Pl5+dX13MBAIBGpFbBkZWVpUWLFumBBx6o63kAAEAjVKv34Th16pQSExPrehYAANBI1So4HnroIS1btqyuZwEAAI1UrX6lUl5erpdffllr167Vddddp8DAQK/1c+bMqZPhAABA41CrKxw7d+5U9+7d5e/vr127dumjjz7yPD7++OM6HfDw4cO6//771bJlS4WGhqpr167avn27Z70xRtOnT1d0dLRCQ0OVlJSkffv21ekMAADg4tTqCsf69evreo5qHT9+XDfffLN69+6tt99+W61bt9a+ffvUvHlzzzazZ8/WvHnztHjxYsXHx2vatGlKTk7W559/rpCQkEsyJwAAOL9avw/HpTBr1izFxsZq4cKFnmXx8fGe/zbGKCMjQ0888YQGDRokSVqyZImcTqdWrFihIUOGXPKZAQDAuWoVHL179z7ve2+sW7eu1gOdbeXKlUpOTtbdd9+tjRs36sorr9QjjzyiUaNGSfr+HU8LCgqUlJTk2cfhcKhXr17Kzc2tMTgqKipUUVHhee5yuepkXgAAUL1a3cPRvXt3devWzfPo0qWLTp06pR07dqhr1651Ntw//vEPLViwQO3bt9c777yjsWPHasKECVq8eLEkqaCgQJLkdDq99nM6nZ511UlPT5fD4fA8YmNj62xmAABwrlpd4Zg7d261y5988kmVlpZe1EBnc7vd6tmzp5577jlJ0vXXX69du3YpKyvrnE+rvRBpaWmaPHmy57nL5SI6AACwqFZXOGpy//331+nnqERHR6tLly5eyzp37qyDBw9KkqKioiRJhYWFXtsUFhZ61lUnODhYkZGRXg8AAGBPnQZHbm5unb4y5Oabb9aePXu8lu3du1dt27aV9P0NpFFRUcrJyfGsd7lc2rJlixISEupsDgAAcHFq9SuVu+66y+u5MUb5+fnavn27pk2bVieDSdKkSZOUmJio5557Tvfcc4+2bt2ql19+WS+//LIkyc/PTxMnTtQzzzyj9u3be14WGxMTo8GDB9fZHAAA4OLUKjgcDofXc39/f3Xs2FFPP/20br/99joZTJJuvPFGvfHGG0pLS9PTTz+t+Ph4ZWRkaOjQoZ5tpk6dqrKyMo0ePVrFxcW65ZZblJ2dzXtwAABQj/gZY4yvh/A1l8slh8OhkpKSRn8/R9zjb/l6BNShAzMH+noE1CHOz8bncjhHf+rfoRf1xl95eXnavXu3JOmaa67R9ddffzGHAwAAjVStgqOoqEhDhgzRhg0b1KxZM0lScXGxevfureXLl6t169Z1OSMAAGjgavUqlfHjx+vEiRP67LPPdOzYMR07dky7du2Sy+XShAkT6npGAADQwNXqCkd2drbWrl2rzp07e5Z16dJFmZmZdXrTKAAAaBxqdYXD7XYrMDDwnOWBgYFyu90XPRQAAGhcahUcffr00aOPPqojR454lh0+fFiTJk1S375962w4AADQONQqOF588UW5XC7FxcWpXbt2ateuneLj4+VyuTR//vy6nhEAADRwtbqHIzY2Vjt27NDatWv1xRdfSPr+M07O/ph4AACAMy7oCse6devUpUsXuVwu+fn5qV+/fho/frzGjx+vG2+8Uddcc43ee+89W7MCAIAG6oKCIyMjQ6NGjar2ncQcDofGjBmjOXPm1NlwAACgcbig4Pjkk0/Uv3//GtfffvvtysvLu+ihAABA43JBwVFYWFjty2HPCAgI0DfffHPRQwEAgMblgoLjyiuv1K5du2pcv3PnTkVHR1/0UAAAoHG5oOC44447NG3aNJWXl5+z7rvvvtOMGTP0q1/9qs6GAwAAjcMFvSz2iSee0N/+9jd16NBBqamp6tixoyTpiy++UGZmpqqqqvSHP/zByqAAAKDhuqDgcDqd2rx5s8aOHau0tDQZYyRJfn5+Sk5OVmZmppxOp5VBAQBAw3XBb/zVtm1brV69WsePH9eXX34pY4zat2+v5s2b25gPAAA0ArV6p1FJat68uW688ca6nAUAADRStfosFQAAgAtBcAAAAOsIDgAAYB3BAQAArCM4AACAdQQHAACwjuAAAADWERwAAMA6ggMAAFhHcAAAAOsIDgAAYB3BAQAArCM4AACAdQQHAACwjuAAAADWERwAAMA6ggMAAFhHcAAAAOsIDgAAYB3BAQAArCM4AACAdQQHAACwjuAAAADWNajgmDlzpvz8/DRx4kTPsvLyco0bN04tW7ZUeHi4UlJSVFhY6LshAQDAORpMcGzbtk0vvfSSrrvuOq/lkyZN0ptvvqnXXntNGzdu1JEjR3TXXXf5aEoAAFCdBhEcpaWlGjp0qP70pz+pefPmnuUlJSX685//rDlz5qhPnz664YYbtHDhQm3evFkffvihDycGAABnaxDBMW7cOA0cOFBJSUley/Py8lRZWem1vFOnTmrTpo1yc3NrPF5FRYVcLpfXAwAA2BPg6wF+zPLly7Vjxw5t27btnHUFBQUKCgpSs2bNvJY7nU4VFBTUeMz09HQ99dRTdT0qAACoQb2+wnHo0CE9+uijWrp0qUJCQursuGlpaSopKfE8Dh06VGfHBgAA56rXwZGXl6eioiL16NFDAQEBCggI0MaNGzVv3jwFBATI6XTq1KlTKi4u9tqvsLBQUVFRNR43ODhYkZGRXg8AAGBPvf6VSt++ffXpp596LRsxYoQ6deqk3//+94qNjVVgYKBycnKUkpIiSdqzZ48OHjyohIQEX4wMAACqUa+DIyIiQtdee63XsqZNm6ply5ae5SNHjtTkyZPVokULRUZGavz48UpISNBNN93ki5EBAEA16nVw/BRz586Vv7+/UlJSVFFRoeTkZP3xj3/09VgAAOAsDS44NmzY4PU8JCREmZmZyszM9M1AAADgR9Xrm0YBAEDjQHAAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYV6+DIz09XTfeeKMiIiJ0xRVXaPDgwdqzZ4/XNuXl5Ro3bpxatmyp8PBwpaSkqLCw0EcTAwCA6tTr4Ni4caPGjRunDz/8UGvWrFFlZaVuv/12lZWVebaZNGmS3nzzTb322mvauHGjjhw5orvuusuHUwMAgB8K8PUA55Odne31fNGiRbriiiuUl5enW2+9VSUlJfrzn/+sZcuWqU+fPpKkhQsXqnPnzvrwww910003+WJsAADwA/X6CscPlZSUSJJatGghScrLy1NlZaWSkpI823Tq1Elt2rRRbm5ujcepqKiQy+XyegAAAHsaTHC43W5NnDhRN998s6699lpJUkFBgYKCgtSsWTOvbZ1OpwoKCmo8Vnp6uhwOh+cRGxtrc3QAAC57DSY4xo0bp127dmn58uUXfay0tDSVlJR4HocOHaqDCQEAQE3q9T0cZ6SmpmrVqlXatGmTfvazn3mWR0VF6dSpUyouLva6ylFYWKioqKgajxccHKzg4GCbIwMAgLPU6yscxhilpqbqjTfe0Lp16xQfH++1/oYbblBgYKBycnI8y/bs2aODBw8qISHhUo8LAABqUK+vcIwbN07Lli3T3//+d0VERHjuy3A4HAoNDZXD4dDIkSM1efJktWjRQpGRkRo/frwSEhJ4hQoAAPVIvQ6OBQsWSJJ++ctfei1fuHChhg8fLkmaO3eu/P39lZKSooqKCiUnJ+uPf/zjJZ4UAACcT70ODmPMj24TEhKizMxMZWZmXoKJAABAbdTrezgAAEDjQHAAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABY12iCIzMzU3FxcQoJCVGvXr20detWX48EAAD+X6MIjldeeUWTJ0/WjBkztGPHDnXr1k3JyckqKiry9WgAAECNJDjmzJmjUaNGacSIEerSpYuysrIUFhamv/zlL74eDQAASArw9QAX69SpU8rLy1NaWppnmb+/v5KSkpSbm1vtPhUVFaqoqPA8LykpkSS5XC67w9YD7oqTvh4Bdehy+DN7OeH8bHwuh3P0zPdojDnvdg0+OI4ePaqqqio5nU6v5U6nU1988UW1+6Snp+upp546Z3lsbKyVGQFbHBm+ngDA+VxO5+iJEyfkcDhqXN/gg6M20tLSNHnyZM9zt9utY8eOqWXLlvLz8/PhZKgLLpdLsbGxOnTokCIjI309DoCzcH42PsYYnThxQjExMefdrsEHR6tWrdSkSRMVFhZ6LS8sLFRUVFS1+wQHBys4ONhrWbNmzWyNCB+JjIzkf2hAPcX52bic78rGGQ3+ptGgoCDdcMMNysnJ8Sxzu93KyclRQkKCDycDAABnNPgrHJI0efJkDRs2TD179tTPf/5zZWRkqKysTCNGjPD1aAAAQI0kOO6991598803mj59ugoKCtS9e3dlZ2efcyMpLg/BwcGaMWPGOb82A+B7nJ+XLz/zY69jAQAAuEgN/h4OAABQ/xEcAADAOoIDAABYR3DgshcXF6eMjAxfjwE0ahs2bJCfn5+Ki4vPux3nY+NFcMCq4cOHy8/PTzNnzvRavmLFikv+rq6LFi2q9g3etm3bptGjR1/SWYD66sw56+fnp6CgIF199dV6+umndfr06Ys6bmJiovLz8z1vEMX5ePkhOGBdSEiIZs2apePHj/t6lGq1bt1aYWFhvh4DqDf69++v/Px87du3T4899piefPJJ/cd//MdFHTMoKEhRUVE/+g8NzsfGi+CAdUlJSYqKilJ6enqN27z//vv6xS9+odDQUMXGxmrChAkqKyvzrM/Pz9fAgQMVGhqq+Ph4LVu27JxLr3PmzFHXrl3VtGlTxcbG6pFHHlFpaamk7y/njhgxQiUlJZ5/vT355JOSvC/h3nfffbr33nu9ZqusrFSrVq20ZMkSSd+/k216erri4+MVGhqqbt266fXXX6+DnxRQPwQHBysqKkpt27bV2LFjlZSUpJUrV+r48eN68MEH1bx5c4WFhWnAgAHat2+fZ7+vv/5ad955p5o3b66mTZvqmmuu0erVqyV5/0qF8/HyRHDAuiZNmui5557T/Pnz9c9//vOc9V999ZX69++vlJQU7dy5U6+88oref/99paamerZ58MEHdeTIEW3YsEH/+7//q5dffllFRUVex/H399e8efP02WefafHixVq3bp2mTp0q6fvLuRkZGYqMjFR+fr7y8/M1ZcqUc2YZOnSo3nzzTU+oSNI777yjkydP6te//rWk7z9teMmSJcrKytJnn32mSZMm6f7779fGjRvr5OcF1DehoaE6deqUhg8fru3bt2vlypXKzc2VMUZ33HGHKisrJUnjxo1TRUWFNm3apE8//VSzZs1SeHj4OcfjfLxMGcCiYcOGmUGDBhljjLnpppvMb3/7W2OMMW+88YY588dv5MiRZvTo0V77vffee8bf39989913Zvfu3UaS2bZtm2f9vn37jCQzd+7cGr/2a6+9Zlq2bOl5vnDhQuNwOM7Zrm3btp7jVFZWmlatWpklS5Z41v/mN78x9957rzHGmPLychMWFmY2b97sdYyRI0ea3/zmN+f/YQANwNnnrNvtNmvWrDHBwcFm8ODBRpL54IMPPNsePXrUhIaGmldffdUYY0zXrl3Nk08+We1x169fbySZ48ePG2M4Hy9HjeKtzdEwzJo1S3369DnnXzKffPKJdu7cqaVLl3qWGWPkdru1f/9+7d27VwEBAerRo4dn/dVXX63mzZt7HWft2rVKT0/XF198IZfLpdOnT6u8vFwnT578yb8TDggI0D333KOlS5fqgQceUFlZmf7+979r+fLlkqQvv/xSJ0+eVL9+/bz2O3XqlK6//voL+nkA9dWqVasUHh6uyspKud1u3Xfffbrrrru0atUq9erVy7Ndy5Yt1bFjR+3evVuSNGHCBI0dO1bvvvuukpKSlJKSouuuu67Wc3A+Ni4EBy6ZW2+9VcnJyUpLS9Pw4cM9y0tLSzVmzBhNmDDhnH3atGmjvXv3/uixDxw4oF/96lcaO3asnn32WbVo0ULvv/++Ro4cqVOnTl3QTWhDhw7VbbfdpqKiIq1Zs0ahoaHq37+/Z1ZJeuutt3TllVd67cdnQ6Cx6N27txYsWKCgoCDFxMQoICBAK1eu/NH9HnroISUnJ+utt97Su+++q/T0dL3wwgsaP358rWfhfGw8CA5cUjNnzlT37t3VsWNHz7IePXro888/19VXX13tPh07dtTp06f10Ucf6YYbbpD0/b9szn7VS15entxut1544QX5+39/a9Krr77qdZygoCBVVVX96IyJiYmKjY3VK6+8orffflt33323AgMDJUldunRRcHCwDh48qNtuu+3CvnmggWjatOk552Pnzp11+vRpbdmyRYmJiZKkb7/9Vnv27FGXLl0828XGxurhhx/Www8/rLS0NP3pT3+qNjg4Hy8/BAcuqa5du2ro0KGaN2+eZ9nvf/973XTTTUpNTdVDDz2kpk2b6vPPP9eaNWv04osvqlOnTkpKStLo0aO1YMECBQYG6rHHHlNoaKjnJXZXX321KisrNX/+fN1555364IMPlJWV5fW14+LiVFpaqpycHHXr1k1hYWE1Xvm47777lJWVpb1792r9+vWe5REREZoyZYomTZokt9utW265RSUlJfrggw8UGRmpYcOGWfipAb7Xvn17DRo0SKNGjdJLL72kiIgIPf7447ryyis1aNAgSdLEiRM1YMAAdejQQcePH9f69evVuXPnao/H+XgZ8vVNJGjczr4B7Yz9+/eboKAgc/Yfv61bt5p+/fqZ8PBw07RpU3PdddeZZ5991rP+yJEjZsCAASY4ONi0bdvWLFu2zFxxxRUmKyvLs82cOXNMdHS0CQ0NNcnJyWbJkiVeN6kZY8zDDz9sWrZsaSSZGTNmGGO8b1I74/PPPzeSTNu2bY3b7fZa53a7TUZGhunYsaMJDAw0rVu3NsnJyWbjxo0X98MC6oHqztkzjh07Zh544AHjcDg859nevXs961NTU027du1McHCwad26tXnggQfM0aNHjTHn3jRqDOfj5YaPp0eD9M9//lOxsbFau3at+vbt6+txAAA/guBAg7Bu3TqVlpaqa9euys/P19SpU3X48GHt3bvX8/tcAED9xT0caBAqKyv1b//2b/rHP/6hiIgIJSYmaunSpcQGADQQXOEAAADW8dbmAADAOoIDAABYR3AAAADrCA4AAGAdwQGg3tmwYYP8/PxUXFzs61EA1BGCA0CNvvnmG40dO1Zt2rRRcHCwoqKilJycrA8++KDOvsYvf/lLTZw40WtZYmKi8vPz5XA46uzr1Nbw4cM1ePBgX48BNHi8DweAGqWkpOjUqVNavHixrrrqKhUWFionJ0fffvut1a8bFBSkqKgoq18DwCXmy/dVB1B/HT9+3EgyGzZsOO82I0eONK1atTIRERGmd+/e5uOPP/asnzFjhunWrZtZsmSJadu2rYmMjDT33nuvcblcxpjvP7dDktdj//7953zuxsKFC43D4TBvvvmm6dChgwkNDTUpKSmmrKzMLFq0yLRt29Y0a9bMjB8/3pw+fdrz9cvLy81jjz1mYmJiTFhYmPn5z39u1q9f71l/5rjZ2dmmU6dOpmnTpiY5OdkcOXLEM/8P5zt7fwA/Hb9SAVCt8PBwhYeHa8WKFaqoqKh2m7vvvltFRUV6++23lZeXpx49eqhv3746duyYZ5uvvvpKK1as0KpVq7Rq1Spt3LhRM2fOlCT953/+pxISEjRq1Cjl5+crPz9fsbGx1X6tkydPat68eVq+fLmys7O1YcMG/frXv9bq1au1evVq/fWvf9VLL72k119/3bNPamqqcnNztXz5cu3cuVN33323+vfvr3379nkd9/nnn9df//pXbdq0SQcPHtSUKVMkSVOmTNE999yj/v37e+Y789HsAC6Qr4sHQP31+uuvm+bNm5uQkBCTmJho0tLSzCeffGKMMea9994zkZGRpry83Gufdu3amZdeeskY8/0VgrCwMM8VDWOM+d3vfmd69erleX7bbbeZRx991OsY1V3hkGS+/PJLzzZjxowxYWFh5sSJE55lycnJZsyYMcYYY77++mvTpEkTc/jwYa9j9+3b16SlpdV43MzMTON0Oj3Pz/fpqQB+Ou7hAFCjlJQUDRw4UO+9954+/PBDvf3225o9e7b+67/+S2VlZSotLVXLli299vnuu+/01VdfeZ7HxcUpIiLC8zw6OlpFRUUXPEtYWJjatWvnee50OhUXF6fw8HCvZWeO/emnn6qqqkodOnTwOk5FRYXXzD88bm3nA3B+BAeA8woJCVG/fv3Ur18/TZs2TQ899JBmzJihRx55RNHR0dqwYcM5+zRr1szz3z/8gD0/Pz+53e4LnqO645zv2KWlpWrSpIny8vLUpEkTr+3OjpTqjmH4iCmgzhEcAC5Ily5dtGLFCvXo0UMFBQUKCAhQXFxcrY8XFBSkqqqquhvw/11//fWqqqpSUVGRfvGLX9T6OLbmAy433DQKoFrffvut+vTpo//+7//Wzp07tX//fr322muaPXu2Bg0apKSkJCUkJGjw4MF69913deDAAW3evFl/+MMftH379p/8deLi4rRlyxYdOHBAR48erdXVj+p06NBBQ4cO1YMPPqi//e1v2r9/v7Zu3ar09HS99dZbFzTfzp07tWfPHh09elSVlZV1Mh9wuSE4AFQrPDxcvXr10ty5c3Xrrbfq2muv1bRp0zRq1Ci9+OKL8vPz0+rVq3XrrbdqxIgR6tChg4YMGaKvv/5aTqfzJ3+dKVOmqEmTJurSpYtat26tgwcP1tn3sHDhQj344IN67LHH1LFjRw0ePFjbtm1TmzZtfvIxRo0apY4dO6pnz55q3bp1nb7pGXA58TP8shIAAFjGFQ4AAGAdwQEAAKwjOAAAgHUEBwAAsI7gAAAA1hEcAADAOoIDAABYR3AAAADrCA4AAGAdwQEAAKwjOAAAgHUEBwAAsO7/AEFyyRxAFnsmAAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "_, ax = plt.subplots(figsize=(6, 4))\n", "\n", "dataset[\"sentiment\"].value_counts().plot(kind=\"bar\", ax=ax)\n", "ax.set_xticklabels([\"Negative\", \"Positive\"], rotation=0)\n", "ax.set_xlabel(\"Sentiment\")\n", "ax.set_ylabel(\"Count\")\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Word cloud (before tokenization)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b4a25bdbb50c49b585c563f23cd13cef", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/209 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Gather all the unique words in the dataset\n", "word_freq = Counter()\n", "dataset[\"text\"].str.lower().str.split().progress_apply(word_freq.update)\n", "\n", "# Now get the most common words\n", "common_words = word_freq.most_common(100)\n", "\n", "# Create a word cloud of the most common words\n", "wrd_cloud = WordCloud(width=800, height=400, random_state=SEED).generate_from_frequencies(dict(common_words))\n", "\n", "# Display the word cloud\n", "plt.figure(figsize=(20, 20))\n", "plt.imshow(wrd_cloud, interpolation=\"bilinear\")\n", "plt.axis(\"off\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Word cloud (after tokenization)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9a560aa6aa15497690e4b28504a6ae44", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/209 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Gather all the unique tokens in the dataset\n", "token_freq = Counter()\n", "dataset[\"tokens\"].str.split().progress_apply(token_freq.update)\n", "\n", "# Now get the most common tokens\n", "common_tokens = token_freq.most_common(100)\n", "\n", "# Create a word cloud of the most common tokens\n", "tkn_cloud = WordCloud(width=800, height=400, random_state=SEED).generate_from_frequencies(dict(common_tokens))\n", "\n", "# Display the word cloud\n", "plt.figure(figsize=(20, 20))\n", "plt.imshow(tkn_cloud, interpolation=\"bilinear\")\n", "plt.axis(\"off\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Token association" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5db39c26bec14882a8412f5ff70e9906", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/96 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "_, ax = plt.subplots(2, 1, figsize=(20, 20))\n", "\n", "for i, sentiment in enumerate([\"Negative\", \"Positive\"]):\n", " freq = Counter()\n", " dataset[dataset[\"sentiment\"] == i][\"tokens\"].str.split().progress_apply(freq.update)\n", " most_common = freq.most_common(100)\n", "\n", " cloud = WordCloud(width=800, height=400, random_state=SEED).generate_from_frequencies(dict(most_common))\n", " ax[i].imshow(cloud, interpolation=\"bilinear\")\n", " ax[i].axis(\"off\")\n", " ax[i].set_title(sentiment)\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Token frequency" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhMAAAFzCAYAAACEv4vjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA8ZklEQVR4nO3deVwV9f4/8Ndh3w+CyhKrubC4KynidSEUzDUply+ZltXVFAXUlFuKkHsqbqhpJNp1q1ua2g2vIqAhKuLFJREVUUm2bgoIBgJnfn/4Y/IIqDDAgL6ej8c8Yj4zZ+Z9PqLn1Wc+Z0YhCIIAIiIiojrSkLsAIiIiat4YJoiIiEgShgkiIiKShGGCiIiIJGGYICIiIkkYJoiIiEgShgkiIiKShGGCiIiIJNGSu4CGplKpkJWVBWNjYygUCrnLISIiajYEQcD9+/dhbW0NDY2axx9e+DCRlZUFW1tbucsgIiJqtjIzM2FjY1Pj9hc+TBgbGwN41BEmJiYyV0NERNR8FBYWwtbWVvwsrckLHyYqL22YmJgwTBAREdXBs6YJcAImERERScIwQURERJIwTBAREZEkL/ycCSIiqQRBQHl5OSoqKuQuhaheaWpqQktLS/KtExgmiIie4uHDh8jOzsaDBw/kLoWoQRgYGMDKygo6Ojp1PgbDBBFRDVQqFTIyMqCpqQlra2vo6Ojw5nf0whAEAQ8fPsTvv/+OjIwMtGvX7qk3pnoahgkioho8fPgQKpUKtra2MDAwkLsconqnr68PbW1t3Lp1Cw8fPoSenl6djsMJmEREz1DX/1sjag7q4/ebf0OIiIhIEoYJIiIikoRhgoiIarRw4UJYWFhAoVBg//79cpfzXG7evAmFQoGUlJQGO0dcXBwUCgXy8/Mb7BwA4ODggDVr1jToOeoDJ2BS7cQurb59YHDj1kEks/AjVxvtXIGD2tdq/0mTJmH79u3iupmZGdzc3LBixQp07tz5uY+TmpqK0NBQ7Nu3D71790aLFi1qVYcUz/rWTEhICBYuXNg4xVSjT58+yM7OhlKprJfjRUVFISAgoEo4SUpKgqGhYb2coyFxZIKI6AXk4+OD7OxsZGdnIyYmBlpaWhg2bFitjpGeng4AGDlyJCwtLaGrq1unWsrKymr9msras7OzsWbNGpiYmKi1zZ49u0611BcdHR1YWlo2+FeFW7Vq1Sy+ScQwQUT0AtLV1YWlpSUsLS3RtWtXzJs3D5mZmfj999/FfTIzMzFmzBiYmprCzMwMI0eOxM2bNwE8urwxfPhwAI9m+1d+aKpUKoSFhcHGxga6urro2rUroqOjxWNWXmLYu3cv+vfvDz09PezcuRMA8NVXX8HZ2Rl6enpwcnLCxo0ba6y/snZLS0solUooFApxvXXr1li9enWNNTypoqIC77//PpycnHD79m0AwI8//oju3btDT08Pbdq0QWhoKMrLy8XXKBQKfPXVV3jzzTdhYGCAdu3a4cCBA+L2Jy9zDBgwAAqFospS2Z+rV69Gp06dYGhoCFtbW3z88ccoKioSj/Xee++hoKBAfF3lqMuTlzlu376NkSNHwsjICCYmJhgzZgxyc3PF7QsXLkTXrl3xzTffwMHBAUqlEuPGjcP9+/dr7J/6wDBBRPSCKyoqwj//+U+0bdsW5ubmAB6NFnh7e8PY2BgnTpxAQkICjIyM4OPjg4cPH2L27NnYtm0bgL9GCQBg7dq1WLVqFVauXIkLFy7A29sbI0aMwLVr19TOOW/ePMycOROpqanw9vbGzp07sWDBAixevBipqalYsmQJ5s+fr3Y55nk9bw0AUFpairfffhspKSk4ceIE7OzscOLECbz77ruYOXMmLl++jC+//BJRUVFYvHix2mtDQ0MxZswYXLhwAW+88Qb8/Pxw9+7damv64Ycf1EZORo8ejQ4dOsDCwgLAo0C2bt06/Prrr9i+fTuOHTuGTz75BMCjSyZPjr5UN/KiUqkwcuRI3L17F/Hx8Thy5Ahu3LiBsWPHqu2Xnp6O/fv349ChQzh06BDi4+OxbNmyWvdzbXDOBBHRC+jQoUMwMjICABQXF8PKygqHDh0S7ymwd+9eqFQqfPXVV+Kow7Zt22Bqaoq4uDgMHjwYpqamAB6NElRauXIl5s6di3HjxgEAli9fjtjYWKxZswYRERHifgEBARg9erS4HhISglWrVoltjo6O4gf5xIkTa/XenreGoqIiDB06FKWlpYiNjRXnN4SGhmLevHniedu0aYPPP/8cn3zyCUJCQsTXT5o0CePHjwcALFmyBOvWrcOZM2fg4+NTpSYzMzPx5/DwcBw7dgynT5+Gvr6+2B+VHBwcsGjRIkyZMgUbN26Ejo6O2uhLTWJiYnDx4kVkZGTA1tYWALBjxw64uroiKSkJbm5uAB6FjqioKBgbGwMAJkyYgJiYmCphqT4xTBARvYAGDhyITZs2AQDu3buHjRs3YsiQIThz5gzs7e1x/vx5XL9+XfzAqVRSUiLOlXhSYWEhsrKy4OHhodbu4eGB8+fPq7X17NlT/Lm4uBjp6emYPHkyPvzwQ7G9vLy81hMYa1PD+PHjYWNjg2PHjokf6gBw/vx5JCQkqH24VlRUoKSkBA8ePBDnKDw+WdXQ0BAmJibIy8t7an0///wz5s2bh4MHD6J9+78mzh49ehRLly7FlStXUFhYiPLy8irne5bU1FTY2tqKQQIAXFxcYGpqitTUVDFMODg4qP25WllZPbNuqRgmiIheQIaGhmjbtq24/tVXX0GpVGLr1q1YtGgRioqK0KNHD3E+w+NatWpVL+evVDk3YOvWrejVq5fafpqampLPVZM33ngD//znP5GYmAhPT0+1ekJDQ9VGTio9fjtpbW1ttW0KhQIqlarG812+fBnjxo3DsmXLMHjwYLH95s2bGDZsGKZOnYrFixfDzMwMv/zyCyZPnoyHDx/W+wTL2tZdHxgmiIheAgqFAhoaGvjzzz8BAN27d8fevXvRunVrmJiYPNcxTExMYG1tjYSEBPTv319sT0hIwGuvvVbj6ywsLGBtbY0bN27Az89P0vuoTQ1Tp05Fx44dMWLECPz000/i/t27d0daWppa2JLqf//7H4YPHw5fX18EBgaqbUtOToZKpcKqVavEy0zffvut2j46OjrPfMS9s7MzMjMzkZmZKY5OXL58Gfn5+XBxcam391IXDBNERC+g0tJS5OTkAHh0mWPDhg0oKioSv6Hh5+eHL774AiNHjhS/nXHr1i388MMP+OSTT2BjY1PtcefMmYOQkBC8+uqr6Nq1K7Zt24aUlJRqRzgeFxoaihkzZkCpVMLHxwelpaU4e/Ys7t27h6CgoFq9t9rU4O/vj4qKCgwbNgw///wz+vbtiwULFmDYsGGws7PDW2+9BQ0NDZw/fx6XLl3CokWLalVLJV9fXxgYGGDhwoVivwOPRnnatm2LsrIyrF+/HsOHD0dCQgI2b96s9noHBwcUFRUhJiYGXbp0gYGBQZURCy8vL3Tq1Al+fn5Ys2YNysvL8fHHH6N///5ql5XkwDBBRFQHtb2RVGOLjo6GlZUVAMDY2BhOTk747rvvMGDAAACAgYEBjh8/jrlz52L06NG4f/8+XnnlFbz++utPHamYMWMGCgoKMGvWLOTl5cHFxQUHDhxAu3btnlrPBx98AAMDA3zxxReYM2cODA0N0alTJ7WJic+rtjUEBARApVLhjTfeQHR0NLy9vXHo0CGEhYVh+fLl0NbWhpOTEz744INa11Lp+PHjAAB7e3u19oyMDHTp0gWrV6/G8uXLERwcjH79+mHp0qV49913xf369OmDKVOmYOzYsfjjjz+qvSmXQqHAjz/+CH9/f/Tr1w8aGhrw8fHB+vXr61x3fVEIgiDIXURDKiwshFKpREFBwXMP5dFT8A6Y9BIpKSlBRkYGHB0d6/xoZqKm7mm/58/7Gcr7TBAREZEkDBNEREQkCcMEERERScIwQURERJIwTBAREZEkDBNEREQkCcMEERERScIwQURERJLIHibu3LmDd955B+bm5tDX10enTp1w9uxZcbsgCFiwYAGsrKygr68PLy+vap9ZT0REz08QBHz00UcwMzODQqFASkqK3CXVKC4uDgqFAvn5+Q12jqioKPGR6w1JoVBg//79DX6exibr7bTv3bsHDw8PDBw4ED///DNatWqFa9euoUWLFuI+K1aswLp167B9+3Y4Ojpi/vz58Pb2xuXLl3lHOiKST013g20IdbzDbGJiIvr27QsfHx/89NNPatuio6MRFRWFuLg4tGnTBi1btoRCocC+ffswatSoeij6Lzdv3oSjo+NT99m2bRsmTZpUr+etjbFjx+KNN96ot+MtXLgQ+/fvrxLSsrOz1T7jXhSyhonly5fD1tYW27ZtE9se/4UTBAFr1qzBZ599hpEjRwIAduzYAQsLC+zfvx/jxo1r9JqJiJqLyMhI+Pv7IzIyEllZWbC2tha3paenw8rKCn369Kn385aVlak9BtvW1hbZ2dni+sqVKxEdHY2jR4+KbUqlst7rqA19fX3o6+s3+HksLS0b/BxykPUyx4EDB9CzZ0+8/fbbaN26Nbp164atW7eK2zMyMpCTkwMvLy+xTalUolevXkhMTKz2mKWlpSgsLFRbiIheNkVFRdi7dy+mTp2KoUOHIioqStw2adIk+Pv74/bt21AoFHBwcICDgwMA4M033xTbKv3444/o3r079PT00KZNG4SGhqK8vFzcrlAosGnTJowYMQKGhoZYvHixWi2ampqwtLQUFyMjI2hpaYnrLVq0wNy5c9G6dWvo6emhb9++SEpKqvG9PXjwAEOGDIGHh4d46eOrr76Cs7Mz9PT04OTkhI0bN4r737x5EwqFAj/88AMGDhwIAwMDdOnSRe1z5MnLHA4ODlAoFFWWSnPnzkX79u1hYGCANm3aYP78+SgrKxOPFRoaivPnz4uvq+z/Jy9zXLx4EZ6entDX14e5uTk++ugjFBUVqf1ZjRo1CitXroSVlRXMzc0xbdo08VxNhaxh4saNG9i0aRPatWuHw4cPY+rUqZgxYwa2b98OAOJjXC0sLNReZ2FhofaI18ctXboUSqVSXCqf+U5E9DL59ttv4eTkhA4dOuCdd97B119/jcrnOq5du1Z87Hh2djaSkpLED+9t27aJbQBw4sQJvPvuu5g5cyYuX76ML7/8ElFRUVUCw8KFC/Hmm2/i4sWLeP/992tV6yeffILvv/8e27dvx7lz59C2bVt4e3vj7t27VfbNz8/HoEGDoFKpcOTIEZiammLnzp1YsGABFi9ejNTUVCxZsgTz588XP0sqffrpp5g9ezZSUlLQvn17jB8/Xi0UPS4pKQnZ2dnIzs7Gb7/9ht69e+Nvf/ubuN3Y2BhRUVG4fPky1q5di61btyI8PBzAo0sms2bNgqurq3iMsWPHVjlHcXExvL290aJFCyQlJeG7777D0aNHMX36dLX9YmNjkZ6ejtjYWGzfvh1RUVFq4bApkDVMqFQqdO/eHUuWLEG3bt3w0Ucf4cMPP6zynPfaCA4ORkFBgbhkZmbWY8VERM1DZGQk3nnnHQCAj48PCgoKEB8fD+DRCK+xsbE4YtCqVSu0atUKAGBqaiq2AUBoaCjmzZuHiRMnok2bNhg0aBA+//xzfPnll2rn+7//+z+89957aNOmDezs7J67zuLiYmzatAlffPEFhgwZAhcXF2zduhX6+vqIjIxU2zcnJwf9+/eHlZUVDh48CAMDAwBASEgIVq1ahdGjR8PR0RGjR49GYGBglRpnz56NoUOHon379ggNDcWtW7dw/fr1autq1aqVOHKyYsUKZGdn4/vvvxe3f/bZZ+jTpw8cHBwwfPhwzJ49G99++y2AR5dMnhx9qe4Syq5du1BSUoIdO3agY8eO8PT0xIYNG/DNN98gNzdX3K9FixbYsGEDnJycMGzYMAwdOhQxMTHP3ceNQdY5E1ZWVnBxcVFrc3Z2Fv/AKq8t5ebmwsrKStwnNzcXXbt2rfaYurq60NXVbZiCiYiagbS0NJw5cwb79u0DAGhpaWHs2LGIjIzEgAEDanWs8+fPIyEhQW0koqKiAiUlJXjw4IH4gd6zZ8861Zqeno6ysjJ4eHiIbdra2njttdeQmpqqtu+gQYPw2muvYe/evdDU1ATwKIykp6dj8uTJ+PDDD8V9y8vLq8zD6Ny5s/hz5WdKXl4enJycaqxvy5YtiIyMxMmTJ8WABQB79+7FunXrkJ6ejqKiIpSXlz/1Ed3VSU1NRZcuXWBoaCi2eXh4QKVSIS0tTRyVd3V1Fd9vZe0XL16s1bkamqxhwsPDA2lpaWptV69ehb29PYBHkzEtLS0RExMjhofCwkKcPn0aU6dObexyiYiahcjISJSXl6tNuBQEAbq6utiwYUOtJjsWFRUhNDQUo0ePrrLt8W/UPf6B2FCGDh2K77//HpcvX0anTp3E+gBg69at6NWrl9r+j38AA1CbFFo5/0GlUtV4vtjYWPj7+2P37t1qQSQxMRF+fn4IDQ2Ft7c3lEol9uzZg1WrVkl7gzV4vO7K2p9WtxxkDROBgYHo06cPlixZgjFjxuDMmTPYsmULtmzZAuBRhwUEBGDRokVo166d+NVQa2vrev/qEhHRi6C8vBw7duzAqlWrMHjwYLVto0aNwu7duzFlypRqX6utrY2Kigq1tu7duyMtLQ1t27ZtkHpfffVV6OjoICEhQfwfybKyMiQlJSEgIEBt32XLlsHIyAivv/464uLi4OLiAgsLC1hbW+PGjRvw8/Ort7quX7+Ot956C//4xz+qBKmTJ0/C3t4en376qdh269YttX10dHSq9OWTnJ2dERUVheLiYjGMJSQkQENDAx06dKind9I4ZA0Tbm5u2LdvH4KDgxEWFgZHR0esWbNG7Rfik08+QXFxMT766CPk5+ejb9++iI6O5j0miIiqcejQIdy7dw+TJ0+uMgLh6+uLyMjIGsOEg4MDYmJi4OHhAV1dXbRo0QILFizAsGHDYGdnh7feegsaGho4f/48Ll26hEWLFkmu19DQEFOnTsWcOXNgZmYGOzs7rFixAg8ePMDkyZOr7L9y5UpUVFTA09MTcXFxcHJyQmhoKGbMmAGlUgkfHx+Ulpbi7NmzuHfvHoKCgmpd059//onhw4eLc/ken/BvaWmJdu3a4fbt29izZw/c3Nzw008/iZeUKjk4OCAjIwMpKSmwsbGBsbFxlUvwfn5+CAkJwcSJE7Fw4UL8/vvv8Pf3x4QJE6p88aCpk/0OmMOGDcPFixdRUlKC1NRUtWtewKPRibCwMOTk5KCkpARHjx5F+/btZaqWiKhpi4yMhJeXV7WXMnx9fXH27FlcuHCh2teuWrUKR44cga2tLbp16wYA8Pb2xqFDh/Cf//wHbm5u6N27N8LDw8VRhPqwbNky+Pr6YsKECejevTuuX7+Ow4cP13hzp/DwcIwZMwaenp64evUqPvjgA3z11VfYtm0bOnXqhP79+yMqKuqZN8qqSW5uLq5cuYKYmBhYW1vDyspKXABgxIgRCAwMxPTp09G1a1ecPHkS8+fPVzuGr68vfHx8MHDgQLRq1Qq7d++uch4DAwMcPnwYd+/ehZubG9566y28/vrr2LBhQ53qlpNCqPyu0AuqsLAQSqUSBQUFtZ4cQ9Wo6a5/dbxDH1FTVlJSgoyMDDg6OnI0lF5YT/s9f97PUNlHJoiIiKh5k3XOBFGtVTcywlERIiJZcWSCiIiIJGGYICIiIkkYJoiIiEgShgkiomd4wb/0Ri+5+vj9ZpggIqpB5W2MHzx4IHMlRA2n8vf7ydt21wa/zUFEVANNTU2YmpoiLy8PwKObDFU+04GouRMEAQ8ePEBeXh5MTU2rPMukNhgmiIieovLpxZWBguhFU/nYeSkYJoiInkKhUMDKygqtW7dGWVmZ3OUQ1SttbW1JIxKVGCaIiJ6DpqZmvfyjS/Qi4gRMIiIikoRhgoiIiCRhmCAiIiJJGCaIiIhIEoYJIiIikoRhgoiIiCRhmCAiIiJJGCaIiIhIEoYJIiIikoRhgoiIiCRhmCAiIiJJGCaIiIhIEoYJIiIikoRhgoiIiCRhmCAiIiJJGCaIiIhIEoYJIiIikoRhgoiIiCRhmCAiIiJJGCaIiIhIEoYJIiIikoRhgoiIiCSRNUwsXLgQCoVCbXFychK3l5SUYNq0aTA3N4eRkRF8fX2Rm5srY8VERET0JNlHJlxdXZGdnS0uv/zyi7gtMDAQBw8exHfffYf4+HhkZWVh9OjRMlZLRERET9KSvQAtLVhaWlZpLygoQGRkJHbt2gVPT08AwLZt2+Ds7IxTp06hd+/ejV0qERERVUP2kYlr167B2toabdq0gZ+fH27fvg0ASE5ORllZGby8vMR9nZycYGdnh8TExBqPV1paisLCQrWFiIiIGo6sYaJXr16IiopCdHQ0Nm3ahIyMDPztb3/D/fv3kZOTAx0dHZiamqq9xsLCAjk5OTUec+nSpVAqleJia2vbwO+CiIjo5SbrZY4hQ4aIP3fu3Bm9evWCvb09vv32W+jr69fpmMHBwQgKChLXCwsLGSiIiIgakOyXOR5namqK9u3b4/r167C0tMTDhw+Rn5+vtk9ubm61cywq6erqwsTERG0hIiKihtOkwkRRURHS09NhZWWFHj16QFtbGzExMeL2tLQ03L59G+7u7jJWSURERI+T9TLH7NmzMXz4cNjb2yMrKwshISHQ1NTE+PHjoVQqMXnyZAQFBcHMzAwmJibw9/eHu7s7v8lBRETUhMgaJn777TeMHz8ef/zxB1q1aoW+ffvi1KlTaNWqFQAgPDwcGhoa8PX1RWlpKby9vbFx40Y5SyYiIqInyBom9uzZ89Ttenp6iIiIQERERCNVRERERLXVpOZMEBERUfPDMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSaMldQKVly5YhODgYM2fOxJo1awAAJSUlmDVrFvbs2YPS0lJ4e3tj48aNsLCwkLdYIpJF+JGrT90eOKh9I1VCRI9rEiMTSUlJ+PLLL9G5c2e19sDAQBw8eBDfffcd4uPjkZWVhdGjR8tUJREREVVH9jBRVFQEPz8/bN26FS1atBDbCwoKEBkZidWrV8PT0xM9evTAtm3bcPLkSZw6dUrGiomIiOhxsoeJadOmYejQofDy8lJrT05ORllZmVq7k5MT7OzskJiYWOPxSktLUVhYqLYQERFRw5F1zsSePXtw7tw5JCUlVdmWk5MDHR0dmJqaqrVbWFggJyenxmMuXboUoaGh9V0qERER1UC2kYnMzEzMnDkTO3fuhJ6eXr0dNzg4GAUFBeKSmZlZb8cmIiKiqmQLE8nJycjLy0P37t2hpaUFLS0txMfHY926ddDS0oKFhQUePnyI/Px8tdfl5ubC0tKyxuPq6urCxMREbSEiIqKGI9tljtdffx0XL15Ua3vvvffg5OSEuXPnwtbWFtra2oiJiYGvry8AIC0tDbdv34a7u7scJRMREVE1ZAsTxsbG6Nixo1qboaEhzM3NxfbJkycjKCgIZmZmMDExgb+/P9zd3dG7d285SiYiIqJqNJmbVlUnPDwcGhoa8PX1VbtpFRERETUdTSpMxMXFqa3r6ekhIiICERER8hREREREzyT7fSaIiIioeWOYICIiIknqdJmjTZs2SEpKgrm5uVp7fn4+unfvjhs3btRLcURETdnTHjzGh47Ry6ROIxM3b95ERUVFlfbS0lLcuXNHclFERETUfNRqZOLAgQPiz4cPH4ZSqRTXKyoqEBMTAwcHh3orjoiIiJq+WoWJUaNGAQAUCgUmTpyotk1bWxsODg5YtWpVvRVHRERETV+twoRKpQIAODo6IikpCS1btmyQooiIiKj5qNMEzIyMjPqug4iIiJqpOt+0KiYmBjExMcjLyxNHLCp9/fXXkgsjIiKi5qFOYSI0NBRhYWHo2bMnrKysoFAo6rsuIiIiaibqFCY2b96MqKgoTJgwob7rISIiomamTveZePjwIfr06VPftRAREVEzVKcw8cEHH2DXrl31XQsRERE1Q3W6zFFSUoItW7bg6NGj6Ny5M7S1tdW2r169ul6KIyIioqavTmHiwoUL6Nq1KwDg0qVLats4GZOIiOjlUqcwERsbW991EBERUTPFR5ATERGRJHUamRg4cOBTL2ccO3aszgURERFR81KnMFE5X6JSWVkZUlJScOnSpSoPACMiIqIXW53CRHh4eLXtCxcuRFFRkaSCiIiIqHmp1zkT77zzDp/LQURE9JKp1zCRmJgIPT29+jwkERERNXF1uswxevRotXVBEJCdnY2zZ89i/vz59VIYERERNQ91ChNKpVJtXUNDAx06dEBYWBgGDx5cL4URERFR81CnMLFt27b6roOIiIiaqTqFiUrJyclITU0FALi6uqJbt271UhRRsxG7tGrbwODGr4OISEZ1ChN5eXkYN24c4uLiYGpqCgDIz8/HwIEDsWfPHrRq1ao+ayQiIqImrE7f5vD398f9+/fx66+/4u7du7h79y4uXbqEwsJCzJgxo75rJCIioiasTiMT0dHROHr0KJydncU2FxcXREREcAImERHRS6ZOYUKlUkFbW7tKu7a2NlQqleSiiKh64Ueu1rgtcFD7RqyEiOgvdbrM4enpiZkzZyIrK0tsu3PnDgIDA/H666/XW3FERETU9NUpTGzYsAGFhYVwcHDAq6++ildffRWOjo4oLCzE+vXr67tGIiIiasLqdJnD1tYW586dw9GjR3HlyhUAgLOzM7y8vOq1OCIiImr6ajUycezYMbi4uKCwsBAKhQKDBg2Cv78//P394ebmBldXV5w4caKhaiUiIqImqFZhYs2aNfjwww9hYmJSZZtSqcTf//53rF69+rmPt2nTJnTu3BkmJiYwMTGBu7s7fv75Z3F7SUkJpk2bBnNzcxgZGcHX1xe5ubm1KZmIiIgaWK3CxPnz5+Hj41Pj9sGDByM5Ofm5j2djY4Nly5YhOTkZZ8+ehaenJ0aOHIlff/0VABAYGIiDBw/iu+++Q3x8PLKysqo8ZIyIiIjkVas5E7m5udV+JVQ8mJYWfv/99+c+3vDhw9XWFy9ejE2bNuHUqVOwsbFBZGQkdu3aBU9PTwCPngni7OyMU6dOoXfv3rUpnYiIiBpIrUYmXnnlFVy6dKnG7RcuXICVlVWdCqmoqMCePXtQXFwMd3d3JCcno6ysTG1Sp5OTE+zs7JCYmFjjcUpLS1FYWKi2EBERUcOpVZh44403MH/+fJSUlFTZ9ueffyIkJATDhg2rVQEXL16EkZERdHV1MWXKFOzbtw8uLi7IycmBjo6O+OyPShYWFsjJyanxeEuXLoVSqRQXW1vbWtVDREREtVOryxyfffYZfvjhB7Rv3x7Tp09Hhw4dAABXrlxBREQEKioq8Omnn9aqgA4dOiAlJQUFBQX417/+hYkTJyI+Pr5Wx3hccHAwgoKCxPXCwkIGCiIiogZUqzBhYWGBkydPYurUqQgODoYgCAAAhUIBb29vREREwMLColYF6OjooG3btgCAHj16ICkpCWvXrsXYsWPx8OFD5Ofnq41O5ObmwtLSssbj6erqQldXt1Y1EBERUd3V+qZV9vb2+Pe//4179+7h+vXrEAQB7dq1Q4sWLeqlIJVKhdLSUvTo0QPa2tqIiYmBr68vACAtLQ23b9+Gu7t7vZyLiIiIpKvTHTABoEWLFnBzc5N08uDgYAwZMgR2dna4f/8+du3ahbi4OBw+fBhKpRKTJ09GUFAQzMzMYGJiAn9/f7i7u/ObHERERE1IncNEfcjLy8O7776L7OxsKJVKdO7cGYcPH8agQYMAAOHh4dDQ0ICvry9KS0vh7e2NjRs3ylly0xC7tGrbwODGr4OIiAgyh4nIyMinbtfT00NERAQiIiIaqSIiIiKqrTo9NZSIiIioEsMEERERScIwQURERJIwTBAREZEkDBNEREQkCcMEERERScIwQURERJLIep8JImrewo9crXFb4KD2jVgJEcmJIxNEREQkCcMEERERScIwQURERJIwTBAREZEkDBNEREQkCcMEERERScIwQURERJIwTBAREZEkDBNEREQkCcMEERERScIwQURERJIwTBAREZEkDBNEREQkCcMEERERScIwQURERJIwTBAREZEkDBNEREQkiZbcBRAR0fMLP3K1xm2Bg9o3YiVEf+HIBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJImsYWLp0qVwc3ODsbExWrdujVGjRiEtLU1tn5KSEkybNg3m5uYwMjKCr68vcnNzZaqYiIiIniRrmIiPj8e0adNw6tQpHDlyBGVlZRg8eDCKi4vFfQIDA3Hw4EF89913iI+PR1ZWFkaPHi1j1URERPQ4WW+nHR0drbYeFRWF1q1bIzk5Gf369UNBQQEiIyOxa9cueHp6AgC2bdsGZ2dnnDp1Cr1795ajbCIiInpMk5ozUVBQAAAwMzMDACQnJ6OsrAxeXl7iPk5OTrCzs0NiYmK1xygtLUVhYaHaQkRERA2nyYQJlUqFgIAAeHh4oGPHjgCAnJwc6OjowNTUVG1fCwsL5OTkVHucpUuXQqlUioutrW1Dl05ERPRSazJhYtq0abh06RL27Nkj6TjBwcEoKCgQl8zMzHqqkIiIiKrTJB5BPn36dBw6dAjHjx+HjY2N2G5paYmHDx8iPz9fbXQiNzcXlpaW1R5LV1cXurq6DV0yERER/X+yjkwIgoDp06dj3759OHbsGBwdHdW29+jRA9ra2oiJiRHb0tLScPv2bbi7uzd2uURERFQNWUcmpk2bhl27duHHH3+EsbGxOA9CqVRCX18fSqUSkydPRlBQEMzMzGBiYgJ/f3+4u7vzmxxERERNhKxhYtOmTQCAAQMGqLVv27YNkyZNAgCEh4dDQ0MDvr6+KC0thbe3NzZu3NjIlRIREVFNZA0TgiA8cx89PT1EREQgIiKiESoiIiKi2moy3+YgIiKi5olhgoiIiCRhmCAiIiJJGCaIiIhIEoYJIiIikoRhgoiIiCRhmCAiIiJJmsSzOYio9nrf3qLeEGv+6L8Dgxu/GCJ6qXFkgoiIiCRhmCAiIiJJGCaIiIhIEoYJIiIikoQTMInqKPzIVfS+/UeV9lPlVwEAgYPaN3ZJRI0u/MjVGrfx78DLgyMTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJAnDBBEREUnCMEFERESSMEwQERGRJAwTREREJImsYeL48eMYPnw4rK2toVAosH//frXtgiBgwYIFsLKygr6+Pry8vHDt2jV5iiUiIqJqyRomiouL0aVLF0RERFS7fcWKFVi3bh02b96M06dPw9DQEN7e3igpKWnkSomIiKgmWnKefMiQIRgyZEi12wRBwJo1a/DZZ59h5MiRAIAdO3bAwsIC+/fvx7hx4xqzVCIiIqpBk50zkZGRgZycHHh5eYltSqUSvXr1QmJiooyVERER0eNkHZl4mpycHACAhYWFWruFhYW4rTqlpaUoLS0V1wsLCxumQCIiIgLQhEcm6mrp0qVQKpXiYmtrK3dJREREL7QmGyYsLS0BALm5uWrtubm54rbqBAcHo6CgQFwyMzMbtE4iIqKXXZMNE46OjrC0tERMTIzYVlhYiNOnT8Pd3b3G1+nq6sLExERtISIiooYj65yJoqIiXL9+XVzPyMhASkoKzMzMYGdnh4CAACxatAjt2rWDo6Mj5s+fD2tra4waNUq+oomIiEiNrGHi7NmzGDhwoLgeFBQEAJg4cSKioqLwySefoLi4GB999BHy8/PRt29fREdHQ09PT66SiYiI6AmyhokBAwZAEIQatysUCoSFhSEsLKwRqyIiIqLaaLJzJoiIiKh5YJggIiIiSRgmiIiISBKGCSIiIpKEYYKIiIgkYZggIiIiSRgmiIiISBKGCSIiIpKEYYKIiIgkYZggIiIiSRgmiIiISBKGCSIiIpJE1gd9EdVW4o0/qrSdKr8KAAgc1L6xyyEiInBkgoiIiCRimCAiIiJJGCaIiIhIEoYJIiIikoRhgoiIiCRhmCAiIiJJGCaIiIhIEoYJIiIikoQ3rSIiIqqj8CNXa9z2Mt1IjyMTREREJAnDBBEREUnCMEFERESScM5EXcUurdo2MLjx6yAiIpIZRyaIiIhIEoYJIiIikoRhgoiIiCRhmCAiIiJJOAGT6izxxh/iz6fK1W/c8jLdrOWlFbsUvW//odZ0yu4jmYqhlwVvEtU0+4AjE0RERCQJwwQRERFJwjBBREREknDOBBE1O71vb6nS1mjzNR67YV3lnBHOFaGXXbMYmYiIiICDgwP09PTQq1cvnDlzRu6SiIiI6P9r8mFi7969CAoKQkhICM6dO4cuXbrA29sbeXl5cpdGREREaAZhYvXq1fjwww/x3nvvwcXFBZs3b4aBgQG+/vpruUsjIiIiNPE5Ew8fPkRycjKCg/96gJaGhga8vLyQmJhY7WtKS0tRWloqrhcUFAAACgsL67e44pKqbfV9jiZ+7uI//+rnkuKiJ8ppmHoeP+eT526oc9akpLio0et5vJ+fPHdh5Z9NI/4uPFnD4/U15Puvqd8b5Xegmr8DT/7+Aw33+1jduRr6nE/T2PW87O+/sc9ZeTxBEJ6+o9CE3blzRwAgnDx5Uq19zpw5wmuvvVbta0JCQgQAXLhw4cKFC5d6WjIzM5/6ed2kRybqIjg4GEFBQeK6SqXC3bt3YW5uDoVCIWNl9auwsBC2trbIzMyEiYmJ3OXIgn3APgDYBwD7AGAfAA3TB4Ig4P79+7C2tn7qfk06TLRs2RKamprIzc1Va8/NzYWlpWW1r9HV1YWurq5am6mpaUOVKDsTE5OX9i9OJfYB+wBgHwDsA4B9ANR/HyiVymfu06QnYOro6KBHjx6IiYkR21QqFWJiYuDu7i5jZURERFSpSY9MAEBQUBAmTpyInj174rXXXsOaNWtQXFyM9957T+7SiIiICM0gTIwdOxa///47FixYgJycHHTt2hXR0dGwsLCQuzRZ6erqIiQkpMolnZcJ+4B9ALAPAPYBwD4A5O0DhSA86/seRERERDVr0nMmiIiIqOljmCAiIiJJGCaIiIhIEoYJIiIikoRhopm5c+cO3nnnHZibm0NfXx+dOnXC2bNn5S6r0VRUVGD+/PlwdHSEvr4+Xn31VXz++efPvm98M3b8+HEMHz4c1tbWUCgU2L9/v9p2QRCwYMECWFlZQV9fH15eXrh27Zo8xTaQp/VBWVkZ5s6di06dOsHQ0BDW1tZ49913kZWVJV/BDeBZvwePmzJlChQKBdasWdNo9TWG5+mD1NRUjBgxAkqlEoaGhnBzc8Pt27cbv9gG8qw+KCoqwvTp02FjYwN9fX3xAZkNjWGiGbl37x48PDygra2Nn3/+GZcvX8aqVavQokULuUtrNMuXL8emTZuwYcMGpKamYvny5VixYgXWr18vd2kNpri4GF26dEFERES121esWIF169Zh8+bNOH36NAwNDeHt7Y2SkmoeCNdMPa0PHjx4gHPnzmH+/Pk4d+4cfvjhB6SlpWHEiBEyVNpwnvV7UGnfvn04derUM29/3Bw9qw/S09PRt29fODk5IS4uDhcuXMD8+fOhp6fXyJU2nGf1QVBQEKKjo/HPf/4TqampCAgIwPTp03HgwIGGLaw+HshFjWPu3LlC37595S5DVkOHDhXef/99tbbRo0cLfn5+MlXUuAAI+/btE9dVKpVgaWkpfPHFF2Jbfn6+oKurK+zevVuGChvek31QnTNnzggAhFu3bjVOUY2spj747bffhFdeeUW4dOmSYG9vL4SHhzd6bY2luj4YO3as8M4778hTkAyq6wNXV1chLCxMra179+7Cp59+2qC1cGSiGTlw4AB69uyJt99+G61bt0a3bt2wdetWuctqVH369EFMTAyuXr0KADh//jx++eUXDBkyRObK5JGRkYGcnBx4eXmJbUqlEr169UJiYqKMlcmroKAACoXihX4uz5NUKhUmTJiAOXPmwNXVVe5yGp1KpcJPP/2E9u3bw9vbG61bt0avXr2eejnoRdSnTx8cOHAAd+7cgSAIiI2NxdWrVzF48OAGPS/DRDNy48YNbNq0Ce3atcPhw4cxdepUzJgxA9u3b5e7tEYzb948jBs3Dk5OTtDW1ka3bt0QEBAAPz8/uUuTRU5ODgBUuSOshYWFuO1lU1JSgrlz52L8+PEv1QOfli9fDi0tLcyYMUPuUmSRl5eHoqIiLFu2DD4+PvjPf/6DN998E6NHj0Z8fLzc5TWa9evXw8XFBTY2NtDR0YGPjw8iIiLQr1+/Bj1vk7+dNv1FpVKhZ8+eWLJkCQCgW7duuHTpEjZv3oyJEyfKXF3j+Pbbb7Fz507s2rULrq6uSElJQUBAAKytrV+aPqCalZWVYcyYMRAEAZs2bZK7nEaTnJyMtWvX4ty5c1AoFHKXIwuVSgUAGDlyJAIDAwEAXbt2xcmTJ7F582b0799fzvIazfr163Hq1CkcOHAA9vb2OH78OKZNmwZra2u1Ecz6xpGJZsTKygouLi5qbc7Ozi/UTOVnmTNnjjg60alTJ0yYMAGBgYFYunSp3KXJwtLSEgCQm5ur1p6bmytue1lUBolbt27hyJEjL9WoxIkTJ5CXlwc7OztoaWlBS0sLt27dwqxZs+Dg4CB3eY2iZcuW0NLSeqn/jfzzzz/xj3/8A6tXr8bw4cPRuXNnTJ8+HWPHjsXKlSsb9NwME82Ih4cH0tLS1NquXr0Ke3t7mSpqfA8ePICGhvqvraampvh/JS8bR0dHWFpaIiYmRmwrLCzE6dOn4e7uLmNljasySFy7dg1Hjx6Fubm53CU1qgkTJuDChQtISUkRF2tra8yZMweHDx+Wu7xGoaOjAzc3t5f638iysjKUlZXJ8m8kL3M0I4GBgejTpw+WLFmCMWPG4MyZM9iyZQu2bNkid2mNZvjw4Vi8eDHs7Ozg6uqK//73v1i9ejXef/99uUtrMEVFRbh+/bq4npGRgZSUFJiZmcHOzg4BAQFYtGgR2rVrB0dHR8yfPx/W1tYYNWqUfEXXs6f1gZWVFd566y2cO3cOhw4dQkVFhThfxMzMDDo6OnKVXa+e9XvwZIDS1taGpaUlOnTo0NilNphn9cGcOXMwduxY9OvXDwMHDkR0dDQOHjyIuLg4+YquZ8/qg/79+2POnDnQ19eHvb094uPjsWPHDqxevbphC2vQ74pQvTt48KDQsWNHQVdXV3BychK2bNkid0mNqrCwUJg5c6ZgZ2cn6OnpCW3atBE+/fRTobS0VO7SGkxsbKwAoMoyceJEQRAefT10/vz5goWFhaCrqyu8/vrrQlpamrxF17On9UFGRka12wAIsbGxcpdeb571e/CkF/Groc/TB5GRkULbtm0FPT09oUuXLsL+/fvlK7gBPKsPsrOzhUmTJgnW1taCnp6e0KFDB2HVqlWCSqVq0Lr4CHIiIiKShHMmiIiISBKGCSIiIpKEYYKIiIgkYZggIiIiSRgmiIiISBKGCSIiIpKEYYKIiIgkYZggItHNmzehUCiQkpIidymiK1euoHfv3tDT00PXrl3r9dgDBgxAQEBAvR6T6GXEMEHUhEyaNAkKhQLLli1Ta9+/f/9L+zTIkJAQGBoaIi0tTe0ZJI9jKCCSF8MEUROjp6eH5cuX4969e3KXUm8ePnxY59emp6ejb9++sLe3f+ke4EXUXDBMEDUxXl5esLS0fOpj1RcuXFhlyH/NmjVqj5ueNGkSRo0ahSVLlsDCwgKmpqYICwtDeXk55syZAzMzM9jY2GDbtm1Vjn/lyhX06dMHenp66NixI+Lj49W2X7p0CUOGDIGRkREsLCwwYcIE/O9//xO3DxgwANOnT0dAQABatmwJb2/vat+HSqVCWFgYbGxsoKuri65duyI6OlrcrlAokJycjLCwMCgUCixcuLDKMSZNmoT4+HisXbsWCoUCCoUCN2/eBADEx8fjtddeg66uLqysrDBv3jyUl5fX2K8//fQTlEoldu7cCQDIzMzEmDFjYGpqCjMzM4wcOVI89uN9vHLlSlhZWcHc3BzTpk1DWVmZuM/GjRvRrl076OnpwcLCAm+99VaN5ydqrhgmiJoYTU1NLFmyBOvXr8dvv/0m6VjHjh1DVlYWjh8/jtWrVyMkJATDhg1DixYtcPr0aUyZMgV///vfq5xnzpw5mDVrFv773//C3d0dw4cPxx9//AEAyM/Ph6enJ7p164azZ88iOjoaubm5GDNmjNoxtm/fDh0dHSQkJGDz5s3V1rd27VqsWrUKK1euxIULF+Dt7Y0RI0bg2rVrAIDs7Gy4urpi1qxZyM7OxuzZs6s9hru7Oz788ENkZ2cjOzsbtra2uHPnDt544w24ubnh/Pnz2LRpEyIjI7Fo0aJqa9m1axfGjx+PnTt3ws/PD2VlZfD29oaxsTFOnDiBhIQEGBkZwcfHR22kJTY2Funp6YiNjcX27dsRFRWFqKgoAMDZs2cxY8YMhIWFIS0tDdHR0ejXr9/z/eERNScN+hgxIqqViRMnCiNHjhQEQRB69+4tvP/++4IgCMK+ffuEx/+6hoSECF26dFF7bXh4uGBvb692LHt7e6GiokJs69Chg/C3v/1NXC8vLxcMDQ2F3bt3C4IgiE/gXLZsmbhPWVmZYGNjIyxfvlwQBEH4/PPPhcGDB6udOzMzUwAgPq20f//+Qrdu3Z75fq2trYXFixertbm5uQkff/yxuN6lSxchJCTkqcfp37+/MHPmTLW2f/zjH0KHDh3UnpYYEREhGBkZiX1S+boNGzYISqVSiIuLE/f95ptvqry+tLRU0NfXFw4fPiwIwl99XF5eLu7z9ttvC2PHjhUEQRC+//57wcTERCgsLHxmXxA1Z1oyZxkiqsHy5cvh6elZ7f+NPy9XV1doaPw1AGlhYYGOHTuK65qamjA3N0deXp7a69zd3cWftbS00LNnT6SmpgIAzp8/j9jYWBgZGVU5X3p6Otq3bw8A6NGjx1NrKywsRFZWFjw8PNTaPTw8cP78+ed8hzVLTU2Fu7u72sRVDw8PFBUV4bfffoOdnR0A4F//+hfy8vKQkJAANzc3cd/z58/j+vXrMDY2VjtuSUkJ0tPTxXVXV1doamqK61ZWVrh48SIAYNCgQbC3t0ebNm3g4+MDHx8fvPnmmzAwMJD8/oiaEoYJoiaqX79+8Pb2RnBwMCZNmqS2TUNDA4IgqLU9fp2+kra2ttq6QqGotk2lUj13XUVFRRg+fDiWL19eZZuVlZX4s6Gh4XMfU07dunXDuXPn8PXXX6Nnz55i+CgqKkKPHj3E+ROPa9Wqlfjz0/rT2NgY586dQ1xcHP7zn/9gwYIFWLhwIZKSkmBqatpwb4qokXHOBFETtmzZMhw8eBCJiYlq7a1atUJOTo5aoKjPe0OcOnVK/Lm8vBzJyclwdnYGAHTv3h2//vorHBwc0LZtW7WlNgHCxMQE1tbWSEhIUGtPSEiAi4tLrerV0dFBRUWFWpuzszMSExPV+ighIQHGxsawsbER21599VXExsbixx9/hL+/v9jevXt3XLt2Da1bt67yPpVK5XPXpqWlBS8vL6xYsQIXLlzAzZs3cezYsVq9P6KmjmGCqAnr1KkT/Pz8sG7dOrX2AQMG4Pfff8eKFSuQnp6OiIgI/Pzzz/V23oiICOzbtw9XrlzBtGnTcO/ePbz//vsAgGnTpuHu3bsYP348kpKSkJ6ejsOHD+O9996r8oH+LHPmzMHy5cuxd+9epKWlYd68eUhJScHMmTNrdRwHBwecPn0aN2/exP/+9z+oVCp8/PHHyMzMhL+/P65cuYIff/wRISEhCAoKUrv0AwDt27dHbGwsvv/+e/F+FX5+fmjZsiVGjhyJEydOICMjA3FxcZgxY8ZzT4w9dOgQ1q1bh5SUFNy6dQs7duyASqVChw4davX+iJo6hgmiJi4sLKzKZQhnZ2ds3LgRERER6NKlC86cOSNpbsWTli1bhmXLlqFLly745ZdfcODAAbRs2RIAxNGEiooKDB48GJ06dUJAQABMTU2rfEg/y4wZMxAUFIRZs2ahU6dOiI6OxoEDB9CuXbtaHWf27NnQ1NSEi4sLWrVqhdu3b+OVV17Bv//9b5w5cwZdunTBlClTMHnyZHz22WfVHqNDhw44duwYdu/ejVmzZsHAwADHjx+HnZ0dRo8eDWdnZ0yePBklJSUwMTF5rrpMTU3xww8/wNPTE87Ozti8eTN2794NV1fXWr0/oqZOITx54ZWIiIioFjgyQURERJIwTBAREZEkDBNEREQkCcMEERERScIwQURERJIwTBAREZEkDBNEREQkCcMEERERScIwQURERJIwTBAREZEkDBNEREQkCcMEERERSfL/ABpYuWSVEOx7AAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "_, ax = plt.subplots(figsize=(6, 4))\n", "\n", "dataset[\"text\"].str.split().str.len().plot(kind=\"hist\", bins=50, ax=ax, alpha=0.5, label=\"Before Tokenization\")\n", "dataset[\"tokens\"].str.split().str.len().plot(kind=\"hist\", bins=50, ax=ax, alpha=0.5, label=\"After Tokenization\")\n", "\n", "ax.set_xlabel(\"Number of tokens\")\n", "ax.set_ylabel(\"Count\")\n", "ax.legend()\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Vocabulary size" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Vocabulary size before tokenization: 947\n", "Vocabulary size after tokenization: 689\n" ] } ], "source": [ "print(f\"Vocabulary size before tokenization: {len(word_freq)}\")\n", "print(f\"Vocabulary size after tokenization: {len(token_freq)}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Vectorization\n", "\n", "The `count` vectorizer is a simple vectorizer that counts the number of times a token appears in a document. The `tfidf` does the same as `count` but also normalizes the counts by the inverse document frequency. The `hashing` vectorizer is a memory efficient vectorizer that uses a hash function to map tokens to features. The `hashing` vectorizer does not store the vocabulary in memory, which makes it possible to vectorize large datasets." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Define vectorizers\n", "vectorizers = {\n", " \"hashing\": _get_vectorizer(\"hashing\", n_features=2**20),\n", " \"count\": _get_vectorizer(\"count\", 20_000),\n", " \"tfidf\": _get_vectorizer(\"tfidf\", 20_000),\n", "}" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hashing: (209, 1048576)\n", "count: (209, 82)\n", "tfidf: (209, 82)\n" ] } ], "source": [ "# Fit and vectorize the tokens\n", "token_list = dataset[\"tokens\"].str.split().tolist()\n", "X = {name: vectorizer.fit_transform(token_list) for name, vectorizer in vectorizers.items()}\n", "\n", "# Display the shape of the vectorized data\n", "for name, data in X.items():\n", " print(f\"{name}: {data.shape}\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "address, allocate, asset, athlete, challenge, citizen, community, corruption, crucial, crucial role\n" ] } ], "source": [ "# Print the first 10 features of count and tfidf vectorizers\n", "features = vectorizers[\"count\"].get_feature_names_out()[:10]\n", "print(\", \".join(features))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhMAAAHWCAYAAADNbgu+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABhFElEQVR4nO3deXxM1/8/8NeErLIiEUtkkVBbCYqidkXVXrXVEkutsQTFRxMSsZRaaysNQWsrirYEjbW2WiLRUiESCWJpCBJElvP7I9/cX0YSZM7cLPJ6Ph7zeJg7N+97MiYz77n3nPdbI4QQICIiItKRQX4PgIiIiAo3JhNEREQkhckEERERSWEyQURERFKYTBAREZEUJhNEREQkhckEERERSWEyQURERFKK5/cA1JaWloY7d+7AwsICGo0mv4dDRERUaAgh8PTpU5QrVw4GBjmff3jnk4k7d+7AwcEhv4dBRERUaMXExKBChQo5Pv7OJxMWFhYA0p8IS0vLfB4NERFR4fHkyRM4ODgon6U5eeeTiYxLG5aWlkwmiIiIdPCmaQKcgElERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUl55xt9vY7TlN/fet+ouR1UHAkREVHhxTMTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJCVfk4k5c+bggw8+gIWFBezs7NClSxdcvXpVa58XL15g1KhRKFWqFMzNzdG9e3fcu3cvn0ZMREREr8rXZOLo0aMYNWoUTp8+jYMHDyI5ORkff/wxEhMTlX3Gjx+PX3/9FT///DOOHj2KO3fuoFu3bvk4aiIiIsqseH4ePCgoSOt+YGAg7OzscP78eTRt2hSPHz9GQEAANm3ahJYtWwIA1q1bh6pVq+L06dNo2LBhfgybiIiIMilQcyYeP34MAChZsiQA4Pz580hOTkbr1q2Vfd577z1UrFgRp06dyjZGUlISnjx5onUjIiIi9RSYZCItLQ3jxo1D48aNUaNGDQDA3bt3YWRkBGtra619y5Qpg7t372YbZ86cObCyslJuDg4Oag+diIioSCswycSoUaPw999/Y8uWLVJxpk6disePHyu3mJgYPY2QiIiIspOvcyYyjB49Gr/99huOHTuGChUqKNvt7e3x8uVLxMfHa52duHfvHuzt7bONZWxsDGNjY7WHTERERP8nX89MCCEwevRo/PLLLzh06BCcnZ21Hq9bty4MDQ0RHBysbLt69Sqio6Px4Ycf5vVwiYiIKBv5emZi1KhR2LRpE3bv3g0LCwtlHoSVlRVMTU1hZWWFwYMHw8vLCyVLloSlpSU8PT3x4YcfciUHERFRAZGvycTKlSsBAM2bN9favm7dOgwcOBAAsGjRIhgYGKB79+5ISkpC27ZtsWLFijweKREREeUkX5MJIcQb9zExMcHy5cuxfPnyPBgRERER5VaBWc1BREREhROTCSIiIpLCZIKIiIikMJkgIiIiKUwmiIiISAqTCSIiIpLCZIKIiIikFIjeHO8ipym/52r/qLkdVBoJERGRunhmgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQUz+8BUO45Tfk9V/tHze2g0kiIiIh4ZoKIiIgk5WsycezYMXTs2BHlypWDRqPBrl27tB4fOHAgNBqN1q1du3b5M1giIiLKVr4mE4mJiahVqxaWL1+e4z7t2rVDbGysctu8eXMejpCIiIjeJF/nTLRv3x7t27d/7T7Gxsawt7fPoxERERFRbhX4ORNHjhyBnZ0dqlSpghEjRiAuLi6/h0RERESZFOjVHO3atUO3bt3g7OyMiIgI/O9//0P79u1x6tQpFCtWLNufSUpKQlJSknL/yZMneTVcIiKiIqlAJxO9evVS/l2zZk28//77qFSpEo4cOYJWrVpl+zNz5syBr69vXg2RiIioyNPpMseNGzf0PY634uLigtKlS+P69es57jN16lQ8fvxYucXExOThCImIiIoenZIJV1dXtGjRAj/++CNevHih7zHl6NatW4iLi0PZsmVz3MfY2BiWlpZaNyIiIlKPTsnEhQsX8P7778PLywv29vYYNmwY/vrrr1zHSUhIwMWLF3Hx4kUAQGRkJC5evIjo6GgkJCRg0qRJOH36NKKiohAcHIzOnTvD1dUVbdu21WXYREREpAKdkonatWtjyZIluHPnDtauXYvY2Fg0adIENWrUwMKFC/HgwYO3inPu3Dm4u7vD3d0dAODl5QV3d3f4+PigWLFiCAsLQ6dOnVC5cmUMHjwYdevWxfHjx2FsbKzLsImIiEgFUhMwixcvjm7duqFDhw5YsWIFpk6diokTJ+J///sfPv/8c3zzzTevvSTRvHlzCCFyfHz//v0ywyMiIqI8IFVn4ty5cxg5ciTKli2LhQsXYuLEiYiIiMDBgwdx584ddO7cWV/jJCIiogJKpzMTCxcuxLp163D16lV88skn2LBhAz755BMYGKTnJs7OzggMDISTk5M+x0pEREQFkE7JxMqVKzFo0CAMHDgwx8sYdnZ2CAgIkBocERERFXw6JRPXrl174z5GRkYYMGCALuGJiIioENFpzsS6devw888/Z9n+888/Y/369dKDIiIiosJDp2Rizpw5KF26dJbtdnZ2mD17tvSgiIiIqPDQKZmIjo6Gs7Nzlu2Ojo6Ijo6WHhQREREVHjolE3Z2dggLC8uyPTQ0FKVKlZIeFBERERUeOiUTvXv3xpgxY3D48GGkpqYiNTUVhw4dwtixY7U6fRIREdG7T6fVHDNnzkRUVBRatWqF4sXTQ6SlpaF///6cM0FERFTE6JRMGBkZYevWrZg5cyZCQ0NhamqKmjVrwtHRUd/jIyIiogJOqjdH5cqVUblyZX2NhYiIiAohnZKJ1NRUBAYGIjg4GPfv30daWprW44cOHdLL4IiIiKjg0ymZGDt2LAIDA9GhQwfUqFEDGo1G3+MiIiKiQkKnZGLLli3Ytm0bPvnkE32Ph4iIiAoZnZaGGhkZwdXVVd9jISIiokJIp2RiwoQJWLJkCYQQ+h4PERERFTI6Xeb4888/cfjwYezbtw/Vq1eHoaGh1uM7d+7Uy+CIiIio4NMpmbC2tkbXrl31PRYiIiIqhHRKJtatW6fvcRAREVEhpdOcCQBISUnBH3/8ge+//x5Pnz4FANy5cwcJCQl6GxwREREVfDqdmbh58ybatWuH6OhoJCUloU2bNrCwsMA333yDpKQkrFq1St/jJCIiogJKpzMTY8eORb169fDo0SOYmpoq27t27Yrg4GC9DY6IiIgKPp3OTBw/fhwnT56EkZGR1nYnJyfcvn1bLwMjIiKiwkGnMxNpaWlITU3Nsv3WrVuwsLCQHhQREREVHjolEx9//DEWL16s3NdoNEhISMD06dNZYpuIiKiI0ekyx4IFC9C2bVtUq1YNL168QJ8+fXDt2jWULl0amzdv1vcYiYiIqADTKZmoUKECQkNDsWXLFoSFhSEhIQGDBw9G3759tSZkEhER0btPp2QCAIoXL44vvvhCn2MhIiKiQkinZGLDhg2vfbx///46DYaIiIgKH52SibFjx2rdT05OxrNnz2BkZAQzMzMmE0REREWITqs5Hj16pHVLSEjA1atX0aRJE07AJCIiKmJ07s3xKjc3N8ydOzfLWQsiIiJ6t+ktmQDSJ2XeuXNHnyGJiIiogNNpzsSePXu07gshEBsbi2XLlqFx48Z6GRgREREVDjolE126dNG6r9FoYGtri5YtW2LBggX6GBflE6cpv+dq/6i5HVQaCRERFRY6JRNpaWn6HgcREREVUnqdM0FERERFj05nJry8vN5634ULF+pyCCIiIiokdEomQkJCEBISguTkZFSpUgUAEB4ejmLFiqFOnTrKfhqNRj+jJCIiogJLp2SiY8eOsLCwwPr162FjYwMgvZCVh4cHPvroI0yYMEGvgyQiIqKCS6c5EwsWLMCcOXOURAIAbGxs4O/vz9UcRERERYxOycSTJ0/w4MGDLNsfPHiAp0+fSg+KiIiICg+dkomuXbvCw8MDO3fuxK1bt3Dr1i3s2LEDgwcPRrdu3fQ9RiIiIirAdJozsWrVKkycOBF9+vRBcnJyeqDixTF48GDMnz9frwMkIiKigk2nZMLMzAwrVqzA/PnzERERAQCoVKkSSpQoodfBERERUcEnVbQqNjYWsbGxcHNzQ4kSJSCE0Ne4iIiIqJDQKZmIi4tDq1atULlyZXzyySeIjY0FAAwePJjLQomIiIoYnZKJ8ePHw9DQENHR0TAzM1O29+zZE0FBQXobHBERERV8Os2ZOHDgAPbv348KFSpobXdzc8PNmzf1MjAiIiIqHHQ6M5GYmKh1RiLDw4cPYWxsLD0oIiIiKjx0SiY++ugjbNiwQbmv0WiQlpaGefPmoUWLFnobHBERERV8OiUT8+bNw+rVq9G+fXu8fPkSX331FWrUqIFjx47hm2++ees4x44dQ8eOHVGuXDloNBrs2rVL63EhBHx8fFC2bFmYmpqidevWuHbtmi5DJiIiIpXolEzUqFED4eHhaNKkCTp37ozExER069YNISEhqFSp0lvHSUxMRK1atbB8+fJsH583bx6WLl2KVatW4cyZMyhRogTatm2LFy9e6DJsIiIiUkGuJ2AmJyejXbt2WLVqFaZNmyZ18Pbt26N9+/bZPiaEwOLFi/H111+jc+fOAIANGzagTJky2LVrF3r16iV1bCIiItKPXJ+ZMDQ0RFhYmBpj0RIZGYm7d++idevWyjYrKys0aNAAp06dyvHnkpKS8OTJE60bERERqUenyxxffPEFAgIC9D0WLXfv3gUAlClTRmt7mTJllMeyM2fOHFhZWSk3BwcHVcdJRERU1OlUZyIlJQVr167FH3/8gbp162bpybFw4UK9DE4XU6dOhZeXl3L/yZMnTCiIiIhUlKtk4saNG3BycsLff/+NOnXqAADCw8O19tFoNHoZmL29PQDg3r17KFu2rLL93r17qF27do4/Z2xszFoXREREeShXyYSbmxtiY2Nx+PBhAOnls5cuXZrlUoQ+ODs7w97eHsHBwUry8OTJE5w5cwYjRozQ+/GIiIhIN7lKJl7tCrpv3z4kJibqfPCEhARcv35duR8ZGYmLFy+iZMmSqFixIsaNGwd/f3+4ubnB2dkZ3t7eKFeuHLp06aLzMYmIiEi/dJozkUG25fi5c+e0KmZmzHUYMGAAAgMD8dVXXyExMRFffvkl4uPj0aRJEwQFBcHExETquERERKQ/uUomNBpNljkRMnMkmjdv/tqERKPRwM/PD35+fjofg4iIiNSV68scAwcOVCY4vnjxAsOHD8+ymmPnzp36GyEREREVaLlKJgYMGKB1/4svvtDrYIiIiKjwyVUysW7dOrXGQURERIWUThUwiYiIiDIwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKRIVcAkyi2nKb+/9b5RczuoOBIiItIXnpkgIiIiKUwmiIiISAqTCSIiIpLCZIKIiIikMJkgIiIiKVzNQe+M3KwUAbhahIhIX3hmgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiklI8vwdAVBg4Tfk9V/tHze2g0kiIiAoenpkgIiIiKUwmiIiISAqTCSIiIpLCZIKIiIikMJkgIiIiKUwmiIiISAqTCSIiIpLCZIKIiIikMJkgIiIiKayASZTPWF2TiAo7npkgIiIiKUwmiIiISAovcxC943JzGYWXUIhIF0wmiEhnnO9BRAAvcxAREZEkJhNEREQkhZc5iKhAUvMSCi/PEOkXz0wQERGRFJ6ZICLSM66goaKGZyaIiIhISoFOJmbMmAGNRqN1e++99/J7WERERJRJgb/MUb16dfzxxx/K/eLFC/yQiYiIipQC/8lcvHhx2Nvb5/cwiIgKBK5EoYKowCcT165dQ7ly5WBiYoIPP/wQc+bMQcWKFXPcPykpCUlJScr9J0+e5MUwiYgKPS7HJV0V6DkTDRo0QGBgIIKCgrBy5UpERkbio48+wtOnT3P8mTlz5sDKykq5OTg45OGIiYiIip4CfWaiffv2yr/ff/99NGjQAI6Ojti2bRsGDx6c7c9MnToVXl5eyv0nT54woSAiesepuRy3sJ6xycuzQQU6mXiVtbU1KleujOvXr+e4j7GxMYyNjfNwVEREREVbgb7M8aqEhARERESgbNmy+T0UIiIi+j8FOpmYOHEijh49iqioKJw8eRJdu3ZFsWLF0Lt37/weGhEREf2fAn2Z49atW+jduzfi4uJga2uLJk2a4PTp07C1tc3voREREdH/KdDJxJYtW/J7CERERPQGBfoyBxERERV8TCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQwmSAiIiIpTCaIiIhICpMJIiIiksJkgoiIiKQUimRi+fLlcHJygomJCRo0aIC//vorv4dERERE/6fAJxNbt26Fl5cXpk+fjgsXLqBWrVpo27Yt7t+/n99DIyIiIhSCZGLhwoUYOnQoPDw8UK1aNaxatQpmZmZYu3Ztfg+NiIiIABTP7wG8zsuXL3H+/HlMnTpV2WZgYIDWrVvj1KlT2f5MUlISkpKSlPuPHz8GADx58iTLvmlJz956LNn9/OvkJnZu4xfW2LmNz+dcPnZu4/M5l4+d2/h8zuVj5zY+n/O3j52xTQjx+h8WBdjt27cFAHHy5Emt7ZMmTRL169fP9memT58uAPDGG2+88cYbb3q6xcTEvPbzukCfmdDF1KlT4eXlpdxPS0vDw4cPUapUKWg0mjf+/JMnT+Dg4ICYmBhYWlrqdWyMnffxGTvv4zN23sdn7LyPX1RiCyHw9OlTlCtX7rX7FehkonTp0ihWrBju3buntf3evXuwt7fP9meMjY1hbGystc3a2jrXx7a0tFTlxc3Y+ROfsfM+PmPnfXzGzvv4RSG2lZXVG/cp0BMwjYyMULduXQQHByvb0tLSEBwcjA8//DAfR0ZEREQZCvSZCQDw8vLCgAEDUK9ePdSvXx+LFy9GYmIiPDw88ntoREREhEKQTPTs2RMPHjyAj48P7t69i9q1ayMoKAhlypRR5XjGxsaYPn16lksljK1ObLXjM3bex2fsvI/P2Hkfn7G1aYR403oPIiIiopwV6DkTREREVPAxmSAiIiIpTCaIiIhICpMJIiIiklLkk4kNGzZo9fLI8PLlS2zYsCEfRkRE9GbR0dHZ9ksQQiA6OjofRkQFkZ+fH549y9qj4/nz5/Dz89PbcYr8ao5ixYohNjYWdnZ2Wtvj4uJgZ2eH1NTUfBrZuy06OhoODg5ZSpwLIRATE4OKFSvm08heb9CgQViyZAksLCy0ticmJsLT05PdbAuB3DRKUrMqoyy+dxV+YWFhb73v+++/r9Mx8up1UuSTCQMDA9y7dw+2trZa20NDQ9GiRQs8fPgw1zH37Nnz1vt26tTprfft1q3bW++7c+fOt943O4mJiZg7dy6Cg4Nx//59pKWlaT1+48YNqfiF9Y0wp3H/999/sLe3R0pKivQxrl+/joiICDRt2hSmpqYQQrxVX5k3+fvvv1GjRo1sH9u1axe6dOkifYyXL18iMjISlSpVQvHiBbOMjYGBwRufz4znPLevw8x9gd5k4cKFuYr9qpzeu27evIlq1aohMTFRKn5ERATWrVuHiIgILFmyBHZ2dti3bx8qVqyI6tWrS8V2cXHB2bNnUapUKa3t8fHxqFOnjvT7i5pj37hxI1atWoXIyEicOnUKjo6OWLx4MZydndG5c+dcxcp4Leb0MZzxmC6vxczHyO51cujQIaWOkz4UzL/2PODu7g6NRgONRoNWrVppvfGlpqYiMjIS7dq10yn2274p5/YF8jb10fVlyJAhOHr0KPr164eyZcvq5cMss5w+IBMSEmBiYpLreGq/iT958gRCCKXpTeYxpqamYu/evVkSjNyKi4tDz549cejQIWg0Gly7dg0uLi4YPHgwbGxssGDBAqn4bdu2xZ9//glnZ2et7Tt27ED//v2lPnyePXsGT09PrF+/HgAQHh4OFxcXeHp6onz58pgyZYrOsbt374769etj8uTJWtvnzZuHs2fP4ueff85VvMOHD+s8ljcJCQl5q/1k/p4yXusajQbe3t4wMzNTHktNTcWZM2dQu3ZtneMDwNGjR9G+fXs0btwYx44dw6xZs2BnZ4fQ0FAEBARg+/btUvGjoqKyfe9LSkrC7du3pWKrOfaVK1fCx8cH48aNw6xZs5TfwdraGosXL851MhEZGanzWN7ExsZG+YyrXLmy1msuNTUVCQkJGD58uN6OV2STiYwP/IsXL6Jt27YwNzdXHjMyMoKTkxO6d++uU+xXv8Xry7p161SJm519+/bh999/R+PGjfUaV603wlffxC9cuICUlBRUqVIFQPqHW7FixVC3bl2dxm1tba31h/kqjUYDX19fnWJnGD9+PIoXL47o6GhUrVpV2d6zZ094eXlJJxNDhgxB69atceLECaVR3tatWzFo0CAEBgZKxZ46dSpCQ0Nx5MgRrSS8devWmDFjhlQycezYMcyYMSPL9vbt2+v0nDRr1kznsbyJmolKhozXuhACly5dgpGRkfKYkZERatWqhYkTJ0odY8qUKfD394eXl5fWJb2WLVti2bJlOsfNfNZ2//79Wl+QUlNTERwcDCcnJ53jA+qNHQC+++47rFmzBl26dMHcuXOV7fXq1dPpOXd0dJQaz+ssXrwYQggMGjQIvr6+Ws91xmecPntcFdlkYvr06UhNTYWTkxM+/vhjlC1bNr+HVKDY2NigZMmSeo+r1hth5jfxhQsXwsLCAuvXr4eNjQ0A4NGjR/Dw8MBHH32k07gPHz4MIQRatmyJHTt2aD03RkZGcHR0fGOL3jc5cOAA9u/fjwoVKmhtd3Nzw82bN6ViA4Cvry8ePnyI1q1b49ixYwgKCsKQIUOwceNGnRPnDLt27cLWrVvRsGFDrW9A1atXR0REhFTshIQErddJBkNDw1zNf8hJfHw8AgICcOXKFQDpYx40aFCengnMjYzXuoeHB5YsWaLKvI5Lly5h06ZNWbbb2dnhv//+0zluxpc4jUaDAQMGaD1maGgIJycn6aRZrbED6WcS3N3ds2w3NjaWvqyU4fLly4iOjsbLly+1tufmkjgA5fl1dnZG48aNVb/sWGSTCSD9+vewYcOUNxG1JCYm4ujRo9m+QMaMGaNz3O3bt2Pbtm3Zxr1w4YLOcQFg5syZ8PHxwfr167XOHsjKizfCBQsW4MCBA0oiAaQnR/7+/vj4448xYcKEXMfM+DYbGRkJBwcHGBjofyFUYmJits/1w4cP9VZH/7vvvkPfvn3RsGFD3L59G5s3b871qdnsPHjwINvLPImJidKXyGrWrImtW7fCx8dHa/uWLVtQrVo1qdjnzp1D27ZtYWpqivr16wNIT0ZnzZqFAwcOoE6dOtLxc/oblZ3XpOaZSmtra8TGxma5JBYSEoLy5cvrHDfjrK2zszPOnj2L0qVLS40zO2qNHUgf98WLF7OcUQgKCtI6m6iLGzduoGvXrrh06ZLWPIqMvx9d50w0a9ZM1TkkClHE1a1bV/zxxx+qxb9w4YKwt7cXlpaWolixYsLW1lZoNBpRokQJ4ezsrHPcJUuWCHNzczF69GhhZGQkhg0bJlq3bi2srKzE//73P+lx165dW1hYWAhzc3NRo0YN4e7urnXTl2vXromgoCDx7NkzIYQQaWlp0jHNzc3F4cOHs2w/dOiQMDc3l47/6NEjsX//frFx40axfv16rZuM9u3bi6+//loIkf473LhxQ6SmpooePXqI7t276xRz9+7dWW7bt28XDg4OYvDgwVrbZXz00Udi6dKlWmMXQojRo0eLtm3bSsXes2ePKF68uOjfv78IDAwUgYGBol+/fqJ48eLil19+kYrdpEkTMXDgQJGcnKxsS05OFgMGDBAfffSRVOzNmzcLQ0ND8emnnwojIyPx6aefisqVKwsrKysxcOBAqdhCCJGQkCC+/vpr8eGHH4pKlSoJZ2dnrZuMCRMmiCZNmojY2FhhYWEhrl27Jv7880/h4uIiZsyYIT327Dx69EgvcdQc+5o1a0T58uXFli1bRIkSJcTmzZuFv7+/8m8Zn376qejcubN48OCBMDc3F5cvXxbHjx8X9evXF8eOHdM57pEjR4Spqalo3bq1MDIyEhEREUIIIebMmaPz+0p2inwysW/fPlG7dm3x66+/ijt37ojHjx9r3WQ1a9ZMDB06VKSmpgpzc3MREREhoqOjRdOmTcWOHTt0jlulShWxadMmIYRQ4gohhLe3txg1apT0uGfMmPHam6y4uDjRsmVLodFohIGBgTJ+Dw8P4eXlJRW7X79+wsnJSezYsUPExMSImJgYsX37duHs7Cz69+8vFXvPnj3CwsJCaDQaYWVlJaytrZWbjY2NVOxLly4JOzs70a5dO2FkZCQ+++wzUbVqVVGmTBlx/fp1nWJqNJq3uhkYGEiN/fjx48Lc3FwMHz5cmJiYiLFjx4o2bdqIEiVKiHPnzknFFkKI3377TTRq1EiYmZmJUqVKiRYtWogjR45IxzUxMRFXrlzJsv2ff/4RpqamUrFr1qwpli1bJoT4/3+jaWlpYujQocLHx0cqthBC9OrVS5QtW1Z89dVXYtGiRWLx4sVaNxlJSUliyJAhonjx4kKj0QhDQ0NhYGAgvvjiC5GSkiI99rlz54otW7Yo9z/77DOh0WhEuXLlxMWLF6Viqz32H3/8Ubi6uip/O+XLlxc//PCDdNxSpUqJ0NBQIYQQlpaW4t9//xVCCBEcHCxq166tc9yGDRuKBQsWCCG0PyvOnDkjypcvLznq/6/IJxOvvqFm3PTxBiuEEFZWVsqLwsrKSly+fFkIIcTp06dFlSpVdI5ramoqoqKihBBC2NraKn+A4eHhomTJkpKjVl+/fv1E27ZtRUxMjNYLPCgoSFSrVk0qdmJiohgxYoQwNjZW/j+NjIzEiBEjREJCglRsNzc3MXbsWJGYmCgVJyfx8fHC399f9OjRQ7Rv315MmzZN3LlzR5Vj6dv169fFkCFDxAcffCCqVq0q+vbtK8LCwvJ7WK9lZ2cn9u/fn2V7UFCQsLOzk4ptZmYmIiMjhRBClCxZUnkuLl++LOzt7aViC5H+fvLnn39Kx3mdmzdvit9//11s3bpVhIeH6y2uk5OTOHHihBBCiAMHDghra2uxf/9+MXjwYNGmTRu9HCM6OlqVsWdITEwU9+7d01s8a2tr5Yyei4uLOHTokBAi/e9KJrEtUaKEEjfze21kZKQwNjaWHPX/V6TnTADqz742NDRUrq/b2dkpM/WtrKwQExOjc1x7e3s8fPgQjo6OqFixIk6fPo1atWohMjIyxzXLujh//rzWxLTsJh/pQs3JhmZmZlixYgXmz5+vTP6rVKkSSpQoIRUXAG7fvo0xY8bodR5JZlZWVpg2bZoqsbMTHx8Pa2trvcSqVKkS1qxZo5dYeaVnz54YPHgwvv32WzRq1AgAcOLECUyaNAm9e/eWim1jY4OnT58CAMqXL4+///4bNWvWRHx8fLYVCXWJr8Yk6cwqVqyoSgG5u3fvwsHBAQDw22+/4fPPP8fHH38MJycnNGjQQCq2n58fJk6cCAcHB+UYQHrFx/nz52eZe5MbkZGRSElJgZubG8zMzJT3gWvXrikTSHVVo0YNhIaGwtnZGQ0aNMC8efNgZGSE1atXw8XFRee4as4hyazIJxNqLhMD0utZnD17Fm5ubmjWrBl8fHzw33//YePGjTkWEHobLVu2xJ49e+Du7g4PDw+MHz8e27dvx7lz53JV3Con9+/fR69evXDkyBHlwyY+Ph4tWrTAli1bshRAya28mGxYokQJnavG5aRt27Y4d+6c1B/368THx+Ovv/7KtlBY//79pWJ/8803cHJyQs+ePQEAPXr0wI4dO1C2bFns3bsXtWrV0jl2y5Yt0axZM0yfPl1r+6NHj9C9e3ccOnQoV/FKliyJ8PBwlC5dWlkvnxNdCstl+Pbbb6HRaNC/f3+l4JihoSFGjBihtfRPF02bNsXBgwdRs2ZN9OjRA2PHjsWhQ4dw8OBBtGrVSio2oP9J0nlZcMvGxgYxMTFwcHBAUFAQ/P39AaSv8pItWOfr64vhw4dneU6ePXsGX19fqWRi4MCBGDRoENzc3LS2nzlzBj/88AOOHDmic+yvv/5aWRHi5+eHTz/9FB999BFKlSqFrVu36hy3V69emDx5Mn7++WdoNBqkpaXhxIkTmDhxovR7SmZFvgImoO7SsHPnzuHp06do0aIF7t+/j/79++PkyZNwc3NDQECAzsVl0tLSkJaWpiz32bJlixJ32LBh2S6ly42ePXvixo0b2LBhgzJL+fLlyxgwYABcXV2xefNmqfiffPIJ6tati5kzZ8LCwgJhYWFwdHREr169kJaWJl0UR61Z9AEBAfDz84OHhwdq1qwJQ0NDrcdzu3wrs19//RV9+/ZFQkICLC0ttT5ANRqN1IcmkD4T/aeffkKjRo1w8OBBfP7559i6davyPB04cEDn2AYGBihVqhQaN26Mn376STkLdO/ePZQrVy7XHxDr169Hr169YGxsrBTCysmrSwx18ezZM62zWPr4cH748CFevHiBcuXKIS0tDfPmzVP+Rr/++mut1Ua6cHd3R0REBIQQcHJyyvJazO2KrhYtWrzVfhqNJtfJ4atGjx6N3377DW5ubggJCUFUVBTMzc2xZcsWzJs3T2o1mpoVHy0tLXHhwgW4urpqbb9+/Trq1auH+Ph4nWNn5+HDh29Mpt/k5cuXGDVqFAIDA5GamorixYsjNTUVffr0QWBgIIoVK6aXsRb5ZCK7pWFnz57F8+fP9bI0rLCysrLCH3/8gQ8++EBr+19//YWPP/5Y+o/m77//RqtWrVCnTh0cOnQInTp1wj///IOHDx/ixIkTqFSpks6xt2zZgv79+6Nt27Y4cOAAPv74Y4SHh+PevXvo2rWr1JK61y0JlSl5CwCVK1fGJ598gtmzZ6tyGcXU1BTh4eFwcHDA2LFj8eLFC3z//fcIDw9HgwYN8OjRI51jGxgYICQkBMOGDUNiYiJ+/fVXODk56ZxM5Idbt24BQJZLbwXVm4qkvXqWqCBJTk7GkiVLEBMTg4EDByqXTxctWgQLCwsMGTIk1zEzPnQfP36cJRnPXPFx+fLlOo/bysoKR44cyXK59/z582jevLlyWasgiomJwaVLl5CQkAB3d3e4ubnh+fPnMDU11Uv8Ip9MfPTRR3B1dcWaNWuUb/kpKSkYMmQIbty4gWPHjknFz3yNLTNdrrGFhYWhRo0aMDAweGODGNnT+xYWFjh+/HiWMychISFo1qyZXooFPX78GMuWLUNoaCgSEhJQp04djBo1SrqA2Pvvv49hw4Zh1KhRsLCwUK5DDhs2DGXLlpWuVKmWEiVK4NKlS6pdQilXrhy2b9+ORo0aoUqVKvD390ePHj1w9epVfPDBB1L/pwYGBrh79y6srKzg4eGBgwcP4ueff0bVqlV1SibyqhlXWloa/P39sWDBAiQkJABIf+1PmDAB06ZNy3U9kSdPnijjedPvUJCbiGVWWJKs9evXKxUfFy9erErFx44dO8LU1BSbN29WvtGnpqaiZ8+eSExMxL59+3SOrVY/pDFjxmDp0qXZHu/TTz/V37xBvU3lLKTUXBomhBBNmzYVgYGBWbZv3LhRNGvWLFexNBqNMns4Y7WJGsv8hBCiU6dOomnTpuL27dvKtlu3bolmzZqJLl26SMfPmKmcnYzldLpSexZ9hufPn+stlhBCdO3aVWzdulWvMTMbNWqUcHR0FK1btxalSpUST58+FUKk10OQrR1iYGCgNbN95syZwtjYWPj4+Oj0enx1dVV2N3281qdMmSJsbW3FihUrRGhoqAgNDRXLly8Xtra2OtVryfw85PQ76OtvVIj02gxr1qwRU6ZMEXFxcUIIIc6fPy9u3bolFTc1NVX4+voKS0tLZdxWVlbCz89PpKam6mPoYsOGDaJx48aibNmyysq0RYsWiV27dknFPXLkiHj58qU+hpjFP//8I0qVKiUqVaokBg4cKAYOHCgqVaokbG1txaVLl6Riq7XU18XFJctS5ISEBNGkSRPRpEkTqTFnVuQnYFpaWiI6Ohrvvfee1vaYmJgsbaZ1ERISkm1/i4YNG2L06NG5ihUZGalcB1SzQQwALFu2DJ06dYKTk5MyIzomJgY1atTAjz/+KB2/W7du+OOPP7L0yliyZAm8vb0xatQonWOrOYs+NTUVs2fPxqpVq3Dv3j2loZW3tzecnJwwePBgnWN36NABkyZNwuXLl/U+HwNIP4Xs5OSEmJgYzJs3T+lHExsbi5EjR0rFFq+c4Pz6669RtWpVnecz5EWPCyD92+wPP/yg9dy+//77KF++PEaOHIlZs2blKt6hQ4eUFRZq/w5hYWFo3bo1rKysEBUVhaFDh6JkyZLYuXMnoqOjsWHDBp1jT5s2DQEBAZg7d67y/vXnn39ixowZePHiRa6fl1fpu2FWZpkn1b948SLLnCmZM0LVqlVDWFiYckbV1NQU/fv3x+jRo6VX1qjVD+nAgQP46KOPYGNjg3HjxuHp06do27YtihcvLnUmJQu9pSWFlKenp6hQoYLYsmWLiI6OFtHR0WLz5s2iQoUKYuzYsdLxLS0txYULF7JsP3funF6qMaopLS1NHDhwQCxdulQsXbpUHDx4UG+x16xZI2xtbbXOCn377bfC0tJSqtqbEEL07t1bKdLi5+cnbG1txZAhQ4Sjo6Po2rWrVGxfX1/h4uIifvzxR2Fqaqqs2d6yZYto2LChVGw1i0qpLSoqKttvrJcuXcr2zFxBYWxsLK5evZpl+7///itMTEykYt+8eTPbiq5paWni5s2bUrGFEKJVq1Zi0qRJQgjt+gEnTpwQjo6OUrHLli2bbVXUXbt2iXLlyknFFkKIqlWrKtVLM4/90qVLolSpUlKxExMTxahRo4StrW22Z4YKKicnJ6UOkb6FhoaKkiVLiiVLloiGDRuKZs2aSdfceVWRTyaSkpLEmDFjhJGRkfJiMzY2FuPGjRMvXryQjv/pp5+KHj16aFVeS0lJEd27dxft2rXTOe7s2bNFQEBAlu0BAQFi7ty5OsfNS998840oX768iIyMFHPnzhWWlpZ6KcITFxenXJ5JTU0Vc+bMER07dhReXl7i4cOHUrErVaqklF/P/CZ45coVYW1tLTdwFezevVs55ZtdaW19ldNW28OHD8X8+fPFoEGDxKBBg8S3336rnNaXUb9+feHp6Zll++jRo0WDBg2kYr966SfDf//9p5cPNUtLS6UyaubXYlRUlHQxIjWTLCHSLy9nXNrIPPbw8HDp+CNHjhRVq1YV27dvF6ampmLt2rVi5syZokKFCuLHH3+UHrta5fQ3btwoPvvsM9UK4p08eVKUKFFCtGzZUmlfoE9FfgJmBjWWhgHpyymbNm0Ka2trpWPl8ePH8eTJExw6dEjnWhNOTk7YtGmTUmgnw5kzZ9CrVy+dLoMsXboUX375JUxMTLKdsJOZTIOyzCZPnoyAgACkpqZi3759aNiwoV7iqsXU1BT//vsvHB0dlcmdLi4uuHz5MurXr69M4isoMiZG2tnZ6X0lSrdu3RAYGAhLS8s31jaRWY577NgxdOzYEVZWVqhXrx6A9Nnz8fHx+PXXX9G0aVOdYx89ehQdOnRAxYoVlcl5p06dQkxMDPbu3atzl1kg5yWKN2/eRLVq1aS7TNrZ2WH//v1wd3fXei0ePHgQgwYNkiqK16BBAzRo0CDL+4CnpyfOnj2L06dPS429WrVqmDNnDjp37qw19u+++w7r1q2TWhpasWJFbNiwAc2bN9dayrlx40Zs3rwZe/fu1Tm2msu39bnU193dPdvlpDdv3oSdnZ3WCg7ZppAZivyciQxmZmZKcSZ9LstT6xrb3bt3s131YGtri9jYWJ1iLlq0CH379oWJiQkWLVqU434ajUanZCK7BKV8+fIwMzND06ZN8ddff+Gvv/4CIJ+sqNUlr1q1ajh+/HiWroHbt2/XqTqo2glcxozw5ORkNG/eHKtWrULlypVzHSc7VlZWyhvWq2+s+jRq1Cj07NkTK1eu1JpBP3LkSIwaNQqXLl3SOXazZs0QHh6O5cuX499//wWQniSNHDlS55byGcWfNBoNvL29td5PUlNTcebMGZ3ry2TWqVMn+Pn5Ydu2bcrxoqOjMXnyZOmW8vPnz8cnn3yCP/74I9skS5aXlxdGjRqFFy9eQAiBv/76C5s3b8acOXPwww8/SMV++PChsiLK0tJS+YBv0qQJRowYIRV7woQJGDRokCrLtzPasxe0WG9N7+c6Cpnk5GTx9ddfa81atrS0FNOmTVNtRrA+uLq6io0bN2bZvmHDBumOgWpxcnJ6q5vs+NXskrdr1y5hZWUl5s6dK8zMzMT8+fPFkCFDhJGRkThw4ECu4zk5OYn//vtP+bdaz4kQQpQuXVpcu3ZNOk5eMzExUfrbZKavU+761rx5c9G8eXOh0WhEo0aNlPvNmzcXH3/8sfjyyy/10isiPj5etG7dWlhbW4tixYoJBwcHYWhoKJo2bSp1Pfzly5eiZcuW4ujRo2LatGmiW7duolu3bmLatGlaq7tkqdUwq2bNmkoTuFatWokJEyYIIdI7Lcs2tjIzM1PeTwqDlJQUcfToUb11ZH2dIp9MDB8+XNjZ2YlVq1YpS8NWrVol7O3txfDhw3WKGRoaqkxGy4iZ001X33zzjShVqpRYu3atiIqKElFRUSIgIECUKlVKzJ49W+e4GXx9fbO9dvfs2TPh6+srHV9NanfJO3bsmGjdurWwtbUVpqamonHjxtk2iypoxo0bJyZPnqxK7BYtWmT7hvX48WPRokULqdiNGjXKttX4L7/8Ij2vQQj1roEPHDhQL52H3+T48eNi+fLl4ptvvtHbJOnSpUur0hxLiPQvcOvXrxd3794VQui/YdbChQvFkiVLhBBCHDx4UJiYmChN/2S7qaq9fFuI9Mn5GzduFBs3bsx28n5uGRsbK42+1FTkkwlLS0uxd+/eLNt///13YWlpqVPMvKgHkZaWJr766ithYmKinFExMzPT2we9mpPHXr58KVxcXFSbuZxXXfIKm9GjRwtLS0tRt25d8eWXX4rx48dr3WRkfs1ndu/ePVG8ePFcx8uccG/ZskVUrFhRzJ8/Xxw/flwcP35czJ8/Xzg5OWm1sdaFmi3l1RYdHa1abDUTTyG0ux6rLSoqSuzYsUPqy1uGH374QVSsWFFMnz5dbN++Xa+TmO/duydatGghNBqNsLGxETY2NkKj0YiWLVuK+/fv6xy3bt26yqRxNRX5ORPGxsbZVqF0dnbWub9FXtSD0Gg0+Oabb+Dt7Y0rV67A1NQUbm5uemuSJYTI9hp4aGio9HpqQ0NDvHjxQirG6+RVlzx9E0Jg+/btOHz4cLYV8GQmMQLpJcwzysOHh4drPabrfIfMlVgvX76Mu3fvKvdTU1MRFBSk03Neu3ZtaDQarfoVX331VZb9+vTpozQu04Wa18DVqmiYwcnJCU2aNMEXX3yBzz77TLrXR2YpKSlYu3atUgvm1Y67so2+6tevj5CQkCxzj9Tg6Oiot+MMHToUQHojrlfJltP39PTE06dP8c8//2TphzRmzBid+yH5+/tj4sSJmDlzZrb/l/qqxFrkV3P4+fnh33//xbp165QP4qSkJAwePBhubm7S9e2PHTuGRo0aKaW6M6SkpODkyZNSM9HVkBf17QFg9uzZCA8Pxw8//JDluZE1ceJEnDlzBj///DMqV66MCxcu4N69e+jfvz/69++f6//T3DTakZnNPXbsWHz//fdo0aIFypQpk+WYMj1F1GJgYKCMM7u3ElNTU3z33XcYNGhQruLmpg29zAeFmiXMe/fujaNHj6Jfv34oW7Zslv/PsWPHSsUPCQnBpk2bsGXLFjx48ADt2rXDF198gY4dO0p/qXhd0y99NPratm0bpk6divHjx2f7ASfTDiC7D/rMZLqGqkmtfkiZV3Flfg1mfGHUV9+cIp9MdO3aFcHBwTA2NlZaMIeGhuLly5dZ2gTr8s2wWLFiiI2NhZ2dndb2uLg42NnZ6fwfqda3nryobw/8/+fd3NwcNWvWzPJmIvMtPLsueSkpKejbt69OXfIyd62Mi4uDv78/2rZtqzXLff/+/fD29sb48eN1HnfJkiXx448/4pNPPtE5Rl67efMmhBBwcXHBX3/9pbUM0sjICHZ2dnrrSqiGbt26oVevXvj888/1Htva2lqVioavEkLgyJEj2LRpE3bs2IG0tDR069YNa9euVfW4MrJbppxxJkr2A+7VVVXJycmIjIxE8eLFUalSJb0thXzx4gVMTEz0EgtQrx/S0aNHX/t45oqhMop8MuHh4fHW++ryzTCntebh4eGoV6+ezi8Qtb/1HD16FI0aNcqy1llf3vS86+NbeHZd8mR1794dLVq0yFIKfdmyZfjjjz+wa9cunWM7Oztj3759WUq7F3VvKgvdv3//XMXbs2eP8u8HDx6o1lLe2dkZe/fuVU5Z54ULFy5g8ODBCAsLK9CdWt905knflz+ePHmCgQMHomvXrujXr5/OcdQsp9+5c2fEx8dj8+bNyrLk27dvo2/fvrCxscEvv/yic+y8UOSTiefPnyMtLU35ZhwVFYVdu3ahatWqaNu2rc5xM4r47N69G+3atdM67ZiamoqwsDBUqVIFQUFBOsXPq289gP7r26shY23/25C53mtubo6LFy/C1dVVa/v169dRu3ZtqaJV69evR1BQENauXau3tsBq2rNnD9q3bw9DQ0OtD+jsyHwovzoXIDk5Gc+ePYORkRHMzMxyfWnpbTuByn5D/vHHH7F7926sX79elZbyGW7duoVNmzZh06ZN+Pvvv/Hhhx+ib9++GD58uGrHLIwuXbqEjh07IioqSucYfn5+WL9+Pfz8/DB06FD8/fffcHFxwdatW7F48WKcOnVK59gxMTHo1KkT/vnnnyz9kPbs2SPdtfXZs2eIjo7O8l4u22E6Q5GfgNm5c2d069YNw4cPR3x8PBo2bAhDQ0P8999/WLhwoc5FTjIuDwghYGFhofXhYGRkhIYNGyqTeXRhY2MjPRHydZ49e4avvvoK27ZtQ1xcXJbHC9q3npCQkLfaT7awUqlSpbB7925MmDBBa/vu3btRqlQpqdiff/45Nm/eDDs7O+kKeHmhS5cuSnXN1xXJkf1QfvToUZZt165dw4gRIzBp0qRcx3v1kqBaFixYgIiICJQpU0aV/8/vv/8emzZtwp9//omqVauib9++2L17d55MatSHjRs3YtWqVYiMjMSpU6fg6OiIxYsXw9nZWarRV04eP36Mx48fS8XYsGEDVq9ejVatWmkla7Vq1VKKnunKwcEBFy5cwB9//KHEqlq1Klq3bi0V98GDB/Dw8MixqZe+3suLfDJx4cIFpdrj9u3bUaZMGYSEhGDHjh3w8fHROZnIOE3v5OSEiRMnZpkTIGvmzJnw8fFR7VvPpEmTcPjwYaxcuRL9+vXD8uXLcfv2bXz//feYO3euXo6xfft2bNu2LdtsObdvtHnVZdLX1xdDhgzBkSNH0KBBAwDpJcyDgoKwZs0aqdgDBgzA+fPn8cUXX2Q7AbOgyfyhnFcf0Bnc3Nwwd+5cfPHFF9Jv4mpRuwqhv78/evfujaVLlyrzvQoLNbuGvlpJVgiB2NhYbNy4Ee3bt5ca9+3bt7OclQTSX//JyclSsYH0xLtNmzZo06aNdKwM48aNQ3x8PM6cOYPmzZvjl19+wb179+Dv748FCxbo7ThFvs6Eqamp0sGvR48eYsaMGUKI9DXcpqam+Tm016pdu7awsLAQ5ubmokaNGsLd3V3rJsvBwUEcPnxYCCGEhYWFUjlxw4YNon379tLxlyxZIszNzcXo0aOFkZGRGDZsmGjdurWwsrIS//vf/6Tjq+n06dOiT58+ynPdp08fcfr0aem4ZmZm4vjx43oYYd7KqJioVpGjnISEhAgLCwupGJ6enkqBo8y+++47vXQNVlNaWpo4duyY6Nu3r/jwww/FrVu3hBDpf6MF/XWkZtfQV6vHuri4iAYNGoipU6eKJ0+eSMWuU6eOUnk487h9fX1FkyZNch1vyZIl4vnz58q/X3fTlb29vThz5owQIv29PKOB2+7du0Xjxo11jvuqIn9mwtXVFbt27ULXrl2xf/9+ZTb+/fv3dZ4XUKdOHQQHB8PGxibHhisZdD3Vqfa3HjXr2wPAihUrsHr1avTu3RuBgYH46quv4OLiAh8fH6nllXmhQYMG+Omnn/Qe18HBocDNRXkbhoaGWvUm9O3V+Rji/75pLlu2THrO0I4dO7Kd79GoUSPMnTsXixcvloofHx+P7du3IyIiApMmTULJkiVx4cIFlClTRrrmyc6dO9GvXz/07dsXFy5cQFJSEoD00/mzZ8/WSw8NtURGRmbby8bY2Fi6AZpatX2A9GWlAwYMwO3bt5GWloadO3fi6tWr2LBhA3777bdcx1O7HxKQvvIvYzWhjY0NHjx4gMqVK6NmzZp6vXRa5JMJHx8f9OnTB+PHj0erVq2U5X4HDhzQqXETkD4PI2PCpVof+rL1L97ExcUFkZGRqFixIt577z1s27YN9evXx6+//qo0RJMRHR2tdDw1NTXF06dPAQD9+vVDw4YNsWzZMuljqCE6Ovq1j1esWFHn2AsWLMBXX32FVatWZVtIrSD74osvEBAQoLdLYJm9+jek0Whga2uLli1bSp+mjYuL01r+nMHS0hL//fefVOywsDC0bt0aVlZWiIqKwtChQ1GyZEns3LkT0dHRb1yl8ib+/v5YtWoV+vfvjy1btijbGzduDH9/f6nYanN2dsbFixezzO8ICgqSXv0yaNAgLFmyBBYWFlrbExMT4enpKbVktnPnzvj111/h5+eHEiVKwMfHB3Xq1MGvv/6q06WJzImPWklQlSpVcPXqVTg5OaFWrVr4/vvv4eTkhFWrVmXbLFJnejvHUYjFxsaKCxcuKP00hEjv43DlypV8HFX+UrO+vRBCODs7K3Xn69atK1atWiWEEGL//v0FuoxxRhn0nG4yrK2thZGRkTAwMBDm5uZKSd2MW0GmZqnuzFJTU7X+TmVVr15dfPfdd1m2L126VFStWlUqdqtWrcSkSZOEENqnxE+cOCEcHR2lYguRfok2MjIyS/yIiIgCXzZ+zZo1onz58mLLli2iRIkSYvPmzcLf31/5t4ycWgE8ePBAFCtWTCq2mtTqh7Rx40axbt06IUR634/SpUsLAwMDYWJiIl2OPrMif2YCAOzt7WFvb6+1rX79+vk0mreTmpqKRYsW5TiBUfZSQebiS61bt8a///6L8+fPw9XVVS9LiVq2bIk9e/bA3d0dHh4eGD9+PLZv345z584py2oLoldXjSQnJyMkJAQLFy7ErFmzpGLLnlLPT2qU6s4sICAAixYtwrVr1wCkT8AcN24chgwZIhXXy8sLo0ePxoMHD9CyZUsAQHBwML799lssWbJEKvbZs2fx/fffZ9levnx5rbLjurK3t8f169eznMX6888/VanoqU9DhgyBqakpvv76azx79gx9+vRBuXLlsGTJEvTq1UunmE+ePIFI7zeFp0+fahWUSk1Nxd69e7MUDyxIfH19MXz48CwT6p89ewZfX1+dK3d+8cUXyr/r1q2Lmzdv4t9//0XFihVRunRpqTFr0VtaQoqMJkFvc9OVt7e3KFu2rPj222+FiYmJmDlzphg8eLAoVaqU1GSdvJKamiqSk5OV+1u2bBGenp5i6dKlBbr1e05+++030axZM51//uXLl8LDwyNPuvsVNt7e3qJEiRJiypQpSkOlKVOmCHNzc+Ht7S0df8WKFaJ8+fJKAz4XFxfpjqFCCGFra6ucfct85uDAgQOiQoUK0vFnz54tqlWrJk6fPi0sLCzE8ePHxY8//ihsbW3F0qVLpePnFX11DX3TWcNixYoJf39/VY8hGzu7hl7BwcGidOnSUrHzQpEvWqWGzOWX32TAgAE6HaNSpUpYunQpOnToAAsLC1y8eFHZdvr0aWzatEmnuBnGjBkDV1fXLJN+li1bhuvXr+vlW/SLFy8QFhaWpRy4RqNBx44dpePnpevXr6NWrVpSk8esrKxw8eLFLA3KCptbt24BgHSRnQy2trZYunQpevfurbV98+bN8PT0lJrb8Pz5cwghYGZmhgcPHuDevXs4ePAgqlWrJlW0Dkj/9h0XF4dt27ahZMmSCAsLQ7FixdClSxc0bdpU+m9ICIHZs2djzpw5ePbsGYD0CYwZTZ0KspYtW2Lnzp1Z5l89efIEXbp00an3x9GjRyGEQMuWLbFjxw6tOjxGRkZwdHRUKkvqavfu3Vr3M85Mrl+/Hr6+vjpVwFS7H5JQuYFg5gNRIWRmZqYsabW3txfnz58XQqRfL9W1dXpm5cqVE+fOncuy/fz586J8+fLS8fft2ydKly6t99bsanv8+LHWLT4+Xly5ckX07NlT1KpVSyp2//79xcKFC/Uz0DyWmpoqfH19haWlpfItzcrKSvj5+UnPcbCyssp22enVq1eFlZWVVOw2bdqIlStXCiGEePTokShTpoyoUKGCMDExEStWrJCKHR8fL1q3bi2sra1FsWLFhIODgzA0NBRNmzYVCQkJUrEzS0pKEv/88484c+aMePr0qd7iqknfLeszi4qKEmlpaVIxcuunn34SnTp10ulnAwMDxbp164RGoxFLliwRgYGBym3Tpk3i5MmTUmMbM2aMMDY2Fu3atRMDBgwQAwcO1LrpC+dM5IHU1FTs2rULV65cAQBUr14dnTp1kmqAVKFCBcTGxqJixYqoVKkSDhw4gDp16uDs2bN6aUOu5ix3IL3d7ueffw4fHx+UKVNGOl5esba2zjIHQAgBBwcHrRn1unBzc4Ofnx9OnDiRbSdFXZeG5YVp06Ypqzkylmv++eefmDFjBl68eCE1n6Rfv35YuXJlljLoq1evRt++faXGrVbROiD9TNPBgwdx4sQJhIaGIiEhAXXq1JGuaPgqIyMjVKtWTa8x1aJWy/rMHB0dcfz4cXz//fe4ceMGfv75Z5QvXx4bN26Es7MzmjRpIhU/Ow0bNsSXX36p089mnJ12dnZWpR/Sxo0bsXPnTvUbCOotLaFsXbt2Tbi5uQkzMzOlyJGZmZmoUqWKuH79us5xJ0+eLGbNmiWESJ9vULx4ceHq6iqMjIzE5MmTpcet5ix3IdKLp8j8/vnlyJEjWrdjx46JK1euaM3/0NWrxXYy35ydnfUwevWULVtW7N69O8v2Xbt2iXLlyknFzlgpUr16dTF48GAxePBgUaNGDWFpaSlGjx4ttWokr4vWPXr0SO8xC5PMcw6yOytpZmYmAgICpI6xfft2YWpqKoYMGSKMjY2VuSrfffedXgruverZs2di7NixonLlynqL+fz58yxnQXXl5OSUJysTmUyorH379qJdu3YiLi5O2fbff/+Jdu3aiU8++URvxzl58qRYsGCB2LNnj17iBQQECFNTU+Hj46N8cHp7ewszMzOxevVq6fgeHh7ihx9+0MNI89bRo0ezTRySk5PF0aNH82FEBYOxsbFSWS+zf//9V5iYmEjFbt68+VvdWrRokevYNWvWFEuWLBHR0dHC0tJSOaV87tw5UaZMGalxz507V2vpXY8ePYSBgYEoV66cuHjxolTswioqKkpERkYKjUYjzp49K6KiopTbnTt3REpKivQxateurUygzTzx9cKFC9L/p69Ors+4hGVhYZFtMp0biYmJYtSoUcLW1lavkzsDAwNFr169xLNnz6TG9yacgKmyEiVK4PTp06hZs6bW9tDQUDRu3Fiqy6TaVq5ciVmzZuHOnTsA0vuMzJgxI9ctn7Pz7Nkz9OjRA7a2ttm2fi6op/SLFSuG2NjYLEvM4uLiYGdnp5emOS9fvkRkZCQqVaqE4sULx5XIBg0aoEGDBln6Inh6euLs2bM4ffp0Po3s9bZv344+ffogNTUVrVq1woEDBwAAc+bMwbFjx3JsjvQ2nJ2d8dNPP6FRo0Y4ePAgPv/8c2zdulVZzp1xLNIvMzMzXL58GU5OTrCwsEBoaChcXFxw48YNVKtWDS9evNA5dmBgoNZlTgMDA9ja2qJBgwZZutvm1qhRo3D48GHMnDkz235Iul7Se/78Obp27YoTJ06o2kCwcLxTFWLGxsZKdcfMEhISYGRkJBX72rVrOc7Q1XVNcmYjRozAiBEj8ODBA5iamsLc3Fw6ZobNmzfjwIEDMDExwZEjR7T+QGVKx6pNCJFt3YS4uDjpZm7Pnj2Dp6enshooPDwcLi4u8PT0RPny5TFlyhSp+GqaN28eOnTogD/++EOpInvq1CnExMQU6LLOn332GZo0aYLY2FitZlmtWrVC165dpWLfvXtXaSX922+/4fPPP8fHH38MJycnpUlcUabW+5ea9TcGDhwo9fOv8+uvv2LDhg1o3rw5PDw88NFHH8HV1RWOjo746aefdE4m8qqBIJMJlX366af48ssvERAQoBTCOnPmDIYPH45OnTrpHHfNmjUYMWIESpcuDXt7+ywfxvpIJjLY2trqLVaGadOmwdfXF1OmTIGBgYHe4+tbRiEtjUaDgQMHak1yTU1NRVhYmFIeXFdTp05FaGgojhw5gnbt2inbW7dujRkzZhToZKJZs2YIDw/H8uXLlS6e3bp1w8iRI6WX46lNraJ1NjY2iImJgYODA4KCgpQS10IIvbV9LqzUfP8aOnQoxo4di7Vr10Kj0eDOnTs4deoUJk6cCG9v71zHy03fGZmCfmr1Q/r999+xf/9+VSaeZsZkQmVLly7FgAED8OGHHyqnl5KTk9G5c2epCnv+/v6YNWsWJk+erK+hanF2dn5tBnvjxg2p+C9fvkTPnj0LRSIBQFnZIoSAhYUFTE1NlceMjIzQsGFDDB06VOoYu3btwtatW9GwYUOt57569eqIiIiQip0XypUrJ10F9F3SrVs39OnTB25uboiLi1PaX4eEhGTbxrooUfP9a8qUKUhLS0OrVq3w7NkzNG3aVKm/4enpmet4tWvXhkajwZtmBGg0GqkkUa1+SHnVQJDJhMqsra2xe/duXL9+HZcvXwYAVKtWTfrN5NGjR+jRo4c+hpitcePGad3PKM4SFBSESZMmSccfMGAAtm7div/973/SsfLCunXrAKSfpZkxY4ZS8jYqKgq7du1C1apVpUvTPnjwINtyv4mJiaqdmpQRFhaGGjVqwMDA4I3f3vRRgr2wWbRoEZycnBATE4N58+YplwljY2MxcuTIfB5d/lLz/Uuj0WDatGmYNGkSrl+/joSEBFSrVk3ny7RqdiHNzMPDA6GhoWjWrBmmTJmCjh07YtmyZUhOTs6yLDo38qqBICdg5gE1+goMHjwYH3zwAYYPH66vYb6V5cuX49y5c8qHq67GjBmDDRs2oFatWnj//fezTAqS+eNRU5s2bdC9e3cMHz4c8fHxeO+992BoaIj//vsPCxculDod2bRpU/To0QOenp6wsLBAWFgYnJ2d4enpiWvXriEoKEiPv4k8AwMD3L17F3Z2djAwMMjx25vsNzZ69+TV+1dMTAwAKHNXdFGnTh0EBwfDxsYGfn5+mDhxYpb+GWq4efOmXvoh2djY4NmzZ0hJSYGZmVmW91rZPk4ZeGZCZT4+Pli4cCE8PT21JqaNHz8e0dHR8PPz0ymuq6srvL29lZUiebUaon379pg6dap0MnHp0iWlxfvff/+t9VhB/BaeISQkRCmDrO8iR7Nnz0b79u1x+fJlpKSkYMmSJbh8+TJOnjyJo0eP6uk30J/IyEhlPk1efXsrTNavX4/SpUujQ4cOAICvvvoKq1evRrVq1bB58+Ys7beLEjXfv1JSUuDr64ulS5cqq+XMzc3h6emJ6dOn57oo1JUrV5CYmAgbG5scm3GpwdHRUS+vkbxqIMgzEypTq6/A6/o3aDQa6TkNOZk3bx5WrFiBqKgoVeIXdGZmZkrHvc8//xzVq1fH9OnTERMTgypVqig9EnQVERGBuXPnalVMnDx5cpalxQVJcnIyhg0bBm9v70LfV0SfqlSpgpUrV6Jly5Y4deoUWrdujUWLFuG3335D8eLF9dcToRBS8/1rxIgR2LlzJ/z8/LS+wM2YMQNdunTBypUrcxXvww8/hLm5OZo0aQJfX19MnDgxx0smMhNH86IfkpqYTKjM2toaZ8+ehZubm9b28PBw1K9fH/Hx8fkzsDdwd3fXOkMghMDdu3fx4MEDrFixQufSsYXd+++/jyFDhqBr166oUaMGgoKC8OGHH+L8+fPo0KGDXlpLF0bvSpMyfcqceE6ePBmxsbHYsGED/vnnHzRv3hwPHjzI7yG+k6ysrLBlyxZlwmuGvXv3onfv3nj8+HGu4l29ehXTp09HREQELly4gGrVqmVb/0Wj0UjVbChfvjz27NmDunXram2/cOECOnXqpDTQK6h4mUNlavYVUFOXLl207mcUZ2nevDnee++9/BlUAeDj44M+ffpg/PjxaNWqlfLN58CBA8plm9x48uTJW++bFzOyddWlSxfs2rUL48ePz++hFBjm5uaIi4tDxYoVceDAAXh5eQEATExM8Pz583weXd7z8vLCzJkzUaJECeW5yI5Go8GCBQt0Po6xsXG2Ew2dnZ11qu1TpUoVpe+OgYEBgoODs50oLUvtfkhqYzKRBwICAnDgwAE0bNgQQHqdiejoaPTv31/rj+pNkw7f9o/xbWK9yfTp06V+/l2l7yJH2TUOy0lBnsRYmJuUqaVNmzYYMmQI3N3dER4erjRa+ueff4rkfImQkBAkJycr/86J7Jyp0aNHY+bMmVi3bp1SDyYpKQmzZs3C6NGjpWK/WlxLn1xdXREUFJRljPv27ZMutpUXeJlDZS1atHir/TQaDQ4dOvTafUqWLInw8HCULl36tXHfJlZ23pVvyYVJ5omVUVFRmDJlCgYOHKh1rXf9+vWYM2eO0l2wIMqvOTwFWXx8PLy9vRETE4MRI0agbdu2ANITdSMjI0ybNi2fR/hu6tq1K4KDg2FsbKwk/KGhoXj58iVatWqlta8u81bUqty5du1ajB49GpMmTULLli0BAMHBwViwYAEWL14sXcdGbUwmCpHMS/FcXFxw9uxZlCpVSq/x34VvyYVVq1atMGTIkCyTdTdt2oTVq1fjyJEj+TOwXMp4SynIq3LyyrFjx5RW2Nu3b0f58uWxYcMGuLi4qF6RsKjy8PB4631zuyrtTZU7ZftcqNkPSW1MJgqRUqVKYe/evWjQoAEMDAxw7949vZa6fle+JRdWZmZmCA0NzXaybu3ataVXiqhNjXoqhdmOHTvQr18/9O3bFxs3bsTly5fh4uKCZcuWYe/evQW6Z0lhJYRATEwMbG1ttarU6oujoyNGjhypWuXhDLL9kDLK/78Nfa0q4pyJQqR79+5o1qwZypYtC41Gg3r16qFYsWLZ7qvLaeVmzZop//bz88PChQu1viV36tQJNWvWxOrVq5lMqMDBwQFr1qzBvHnztLb/8MMPUkV38oJa9VQKM39/f6xatQr9+/dXJvABQOPGjZU+HaRfQgi4urrin3/+yZKU64PalYczyH5JzG4ip9p4ZqKQCQoKwvXr1zFmzBj4+fnBwsIi2/3Gjh0rdZzC/i25MNq7dy+6d+8OV1dXpavkX3/9hWvXrmHHjh3KBL6CSK16KoWZmq2wKWfVq1dHQECAMuFdn9Ss3Kl2PyS18cxEIZPRTfL8+fMYO3ZsjsmErML8Lbmw+uSTT3Dt2jWsWLFC6bzZsWNHDB8+vMA/58nJyahXr16W7XXr1kVKSko+jCj/qdkKm3I2d+5cTJo0CStXrkSNGjX0GlvNyp1q90NSG89MULZy+pYcHh6OnTt3FuhvyZT3PD09YWhomGVJ8sSJE/H8+XMsX748n0aWf+bMmYMff/wRa9euRZs2bbB3717cvHkT48ePh7e3t04dLOnNMveiMDIyyjJ3QqYXRX6sWtJHP6Tt27dj27ZtiI6OxsuXL7Uek500moHJBOXo1q1bWLlyJa5cuQIAqFq1aqH4llyYxcfH46+//sp22VlBntHt6emJDRs2wMHBIdt6Kpm/wRXUJm76JoTA7NmzMWfOHOWyYEYr7JkzZ+bz6N5d69evf+3jhW2+140bN1C7du1cLd3PbOnSpZg2bRoGDhyI1atXw8PDAxERETh79ixGjRqFWbNm6WWcTCYoR8ePH8eqVau0lrVt3LgRzs7OXNamgl9//RV9+/ZFQkICLC0tsyw701d3PzXos57Ku+bly5d6aYVN+SOvKnfmRLYf0nvvvYfp06ejd+/eWnN3fHx88PDhQyxbtkwv4+ScCcpW5mVtISEhSEpKAgA8fvwYs2fP5rI2FUyYMAGDBg3C7Nmz86QroT4dPnw4v4dQYBkZGaFatWr5PYwiJSIiAuvWrUNERASWLFkCOzs77Nu3DxUrVkT16tVzFSuvKne+qR+SrqKjo9GoUSMAgKmpKZ4+fQogvdVDw4YNmUyQurisLe/dvn0bY8aMKXSJBFFBcvToUbRv3x6NGzfGsWPHMGvWLNjZ2SE0NBQBAQHYvn17ruJlTpTVTJrV6odkb2+Phw8fwtHRERUrVsTp06dRq1YtREZGQp8XJphMULauXr2Kpk2bZtluZWVVYDudFnZt27bFuXPnONOfSMKUKVPg7+8PLy8vrdVuLVu21Nu3cDWo1Q+pZcuW2LNnD9zd3eHh4YHx48dj+/btOHfuXK6KW70JkwnKFpe15b0OHTpg0qRJuHz5crbLzjp16pRPIyMqPC5duoRNmzZl2W5nZ1fg6p3kRT+k1atXK5O5R40ahVKlSuHkyZPo1KkThg0bplPM7DCZoGwNHToUY8eOxdq1a6HRaHDnzh2cOnUKEydOhLe3d34P752U0cgnu2qRGo2G/VCI3oK1tTViY2OzLOMMCQlB+fLl82lU2cuLrsEGBgYwMDBQ7vfq1Qu9evXSKdbrMJmgbE2ZMgVpaWlo1aoVnj17hqZNmyrL2rg+Xh1qtjcmKip69eqFyZMn4+eff4ZGo0FaWhpOnDiBiRMnFrjl1ZnnYLypH1JuhIWFoUaNGjAwMEBYWNhr933//fdzP/BscGkovRaXteWd1/Wv0Gg0PCNE9BZevnyJUaNGITAwEKmpqShevDhSUlLQt29fBAYG5tjPKL/ps2tw5g7TGd2gs/uo1+cZTyYTRAWEu7u71v3k5GRERkaiePHiqFSpkt4q1REVBTExMbh06RISExPh7u4OV1fX/B7Sa+mzH9LNmzdRsWJFaDQa3Lx587X7Ojo66jTeV/EyB1EBkd0a9idPnmDgwIHo2rVrPoyIqHAKCAjAokWLcO3aNQCAm5sbxo0bhyFDhuTzyHKmz35ImRMEfSULb8IzE0QF3KVLl9CxY0edK+ARFSU+Pj5YuHAhPD09teYeLFu2DOPHj3/t5cT8pFY/pDlz5qBMmTIYNGiQ1va1a9fiwYMHmDx5svTYASYTRAXen3/+iY4dO+LRo0f5PRSiAs/W1hZLly7NMvdg8+bN8PT0LHDLQzNTox+Sk5MTNm3apFTBzHDmzBn06tULkZGRUmPOwMscRAXE0qVLte4LIRAbG4uNGzeiffv2+TQqosIlOTkZ9erVy7K9bt26SElJyYcRvb3IyEhERUUhNjZWb/2Q7t69i7Jly2bZbmtri9jYWNkhK5hMEBUQixYt0rqfUU53wIABmDp1aj6Niqhw6devH1auXJmlO+3q1avRt2/ffBrVm6nVD8nBwQEnTpzIUnfjxIkTKFeunPS4MzCZICog9HW6kaioCwgIwIEDB9CwYUMA6af0o6Oj0b9/f63On68mHPlJrX5IQ4cOxbhx45CcnIyWLVsCAIKDg/HVV19hwoQJ0uPOwGSCiIjeGX///Tfq1KkDIL17KACULl0apUuXxt9//63sJ9vlU9/U6oc0adIkxMXFYeTIkXj58iUAwMTEBJMnT9brGU8mE0RE9M5Qs7OnmtTqh6TRaPDNN9/A29sbV65cgampKdzc3GBsbCw5Ym1MJoiIiPKZ2v2QzM3N8cEHH+hhpNljMkFERJTP1OqHlJiYiLlz5yI4OBj379/P0gPoxo0bskMHwDoTREREBYa++yH17t0bR48eRb9+/VC2bNksc0XGjh0rFT8DkwkiIqJ3lLW1NX7//Xc0btxY1eMYvHkXIiIiKoxsbGxQsmRJ1Y/DZIKIiOgdNXPmTPj4+OSq66gueJmDiIjoHeXu7o6IiAgIIeDk5ARDQ0Otxy9cuKCX43A1BxER0TuqS5cueXIcnpkgIiIiKZwzQURERFJ4mYOIiOgdlZqaikWLFmHbtm2Ijo5W+nNkePjwoV6OwzMTRERE7yhfX18sXLgQPXv2xOPHj+Hl5YVu3brBwMAAM2bM0NtxOGeCiIjoHVWpUiUsXboUHTp0gIWFBS5evKhsO336NDZt2qSX4/DMBBER0Tvq7t27qFmzJoD0Zl+PHz8GAHz66af4/fff9XYcJhNERETvqAoVKiA2NhZA+lmKAwcOAADOnj2r1zbkTCaIiIjeUV27dkVwcDAAwNPTE97e3nBzc0P//v0xaNAgvR2HcyaIiIiKiFOnTuHUqVNwc3NDx44d9RaXyQQRERFJYZ0JIiKid9i1a9dw+PBh3L9/H2lpaVqP+fj46OUYPDNBRET0jlqzZg1GjBiB0qVLw97eHhqNRnlMo9HordEXkwkiIqJ3lKOjI0aOHInJkyerehwmE0RERO8oS0tLXLx4ES4uLqoeh0tDiYiI3lE9evRQakuoiRMwiYiI3lGurq7w9vbG6dOnUbNmTRgaGmo9PmbMGL0ch5c5iIiI3lHOzs45PqbRaHDjxg29HIfJBBEREUnhnAkiIiKSwjkTRERE7xAvLy/MnDkTJUqUgJeX12v3XbhwoV6OyWSCiIjoHRISEoLk5GQAwIULF7QKVWWW03ZdcM4EERHROyQsLAw1atSAgUHezWTgnAkiIqJ3iLu7O/777z8AgIuLC+Li4lQ/JpMJIiKid4i1tTUiIyMBAFFRUVmae6mBcyaIiIjeId27d0ezZs1QtmxZaDQa1KtXD8WKFct2X33VmWAyQURE9A5ZvXo1unXrhuvXr2PMmDEYOnQoLCwsVD0mJ2ASERG9ozw8PLB06VImE0RERFSwcQImERERSWEyQURERFKYTBAREZEUJhNEREQkhckEEUnRaDSvvc2YMSO/h0hEKmOdCSKSEhsbq/x769at8PHxwdWrV5Vt5ubmuYqXnJwMQ0NDvY2PiNTHMxNEJMXe3l65WVlZQaPRaG3bsmULqlatChMTE7z33ntYsWKF8rNRUVHQaDTYunUrmjVrBhMTE/z0008YOHAgunTpgtmzZ6NMmTKwtraGn58fUlJSMGnSJJQsWRIVKlTAunXr8vE3J6IMPDNBRKr56aef4OPjg2XLlsHd3R0hISEYOnQoSpQogQEDBij7TZkyBQsWLIC7uztMTExw5MgRHDp0CBUqVMCxY8dw4sQJDB48GCdPnkTTpk1x5swZbN26FcOGDUObNm1QoUKFfPwtiYhFq4hIbwIDAzFu3DjEx8cDAFxdXTFz5kz07t1b2cff3x979+7FyZMnERUVBWdnZyxevBhjx45V9hk4cCCOHDmCGzduKG2U33vvPdjZ2eHYsWMAgNTUVFhZWeGHH35Ar1698u6XJKIseGaCiFSRmJiIiIgIDB48GEOHDlW2p6SkwMrKSmvfevXqZfn56tWrK4kEAJQpUwY1atRQ7hcrVgylSpXC/fv3VRg9EeUGkwkiUkVCQgIAYM2aNWjQoIHWY692MCxRokSWn391EqZGo8l2W160Vyai12MyQUSqKFOmDMqVK4cbN26gb9+++T0cIlIRkwkiUo2vry/GjBkDKysrtGvXDklJSTh37hwePXoELy+v/B4eEekJkwkiUs2QIUNgZmaG+fPnY9KkSShRogRq1qyJcePG5ffQiEiPuJqDiIiIpLBoFREREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJIXJBBEREUlhMkFERERSmEwQERGRFCYTREREJOX/AaRHScusilDJAAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plot top 20 features of tfidf vectorizer\n", "features = vectorizers[\"tfidf\"].get_feature_names_out()\n", "terms = pd.DataFrame(X[\"tfidf\"].toarray(), columns=features)\n", "\n", "_, ax = plt.subplots(figsize=(6, 4))\n", "terms.sum().sort_values(ascending=False).head(20).plot(kind=\"bar\", ax=ax)\n", "ax.set_xlabel(\"Term\")\n", "ax.set_ylabel(\"Frequency\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classification" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# Define classifiers\n", "classifiers = [\n", " (LogisticRegression(max_iter=1000, random_state=SEED), {\"C\": np.logspace(-3, 3, 20)}),\n", " (SVC(random_state=SEED), {\"C\": np.logspace(-3, 3, 20), \"gamma\": np.logspace(-3, 3, 20)}),\n", " (KNeighborsClassifier(), {\"n_neighbors\": np.arange(1, 12)}),\n", " (DecisionTreeClassifier(random_state=SEED), {\"max_depth\": np.arange(1, 12)}),\n", " (RandomForestClassifier(random_state=SEED), {\"n_estimators\": np.arange(10, 500, 50)}),\n", " (AdaBoostClassifier(algorithm=\"SAMME\", random_state=SEED), {\"n_estimators\": np.arange(10, 500, 50)}),\n", " (GradientBoostingClassifier(random_state=SEED), {\"n_estimators\": np.arange(100, 500, 25)}),\n", " (\n", " VotingClassifier(\n", " estimators=[\n", " (\"lr\", LogisticRegression(max_iter=1000, random_state=SEED)),\n", " (\"svc\", SVC(random_state=SEED)),\n", " (\"rf\", RandomForestClassifier(random_state=SEED)),\n", " ],\n", " ),\n", " {\n", " \"lr__C\": np.logspace(-3, 3, 20),\n", " \"svc__C\": np.logspace(-3, 3, 20),\n", " \"svc__gamma\": np.logspace(-3, 3, 20),\n", " \"rf__n_estimators\": np.arange(10, 500, 50),\n", " },\n", " ),\n", " (\n", " MLPClassifier(hidden_layer_sizes=(20, 10), max_iter=1000, random_state=SEED),\n", " {\"learning_rate_init\": np.logspace(-3, 0, 20), \"batch_size\": [32, 64, 128]},\n", " ),\n", "]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# Split the data into training and testing sets (using TF-IDF vectorizer)\n", "X_train, X_test, y_train, y_test = train_test_split(X[\"tfidf\"], dataset[\"sentiment\"], test_size=0.2, random_state=SEED)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# Define the cross-validation strategy\n", "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b7251857c51f41699ab2a750855875ac", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/9 [00:00 np.ndarray:\n", " group_names = [\"True Neg\", \"False Pos\", \"False Neg\", \"True Pos\"]\n", " group_percentages = [f\"{value:.2%}\" for value in cm.flatten() / cm.sum()]\n", " labels = [f\"{v1}\\n{v2}\" for v1, v2 in zip(group_names, group_percentages)]\n", " return np.asarray(labels).reshape(2, 2)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ab4d2fe86927402bbcb49bcd0e19eca7", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/9 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = plt.figure(figsize=(20, 20))\n", "N_COLS = 3\n", "N_ROWS = len(estimators) // N_COLS\n", "\n", "results = pd.DataFrame(columns=[\"Classifier\", \"Accuracy\", \"Precision\", \"Recall\", \"F1\", \"ROC AUC\", \"Time\", \"Memory\"])\n", "for i, (clf, tim, mem) in enumerate(tqdm(estimators, unit=\"clf\")):\n", " name = clf.__class__.__name__\n", "\n", " # Predict\n", " y_pred = clf.predict(X_test)\n", "\n", " # Calculate metrics\n", " accuracy = accuracy_score(y_test, y_pred)\n", " precision = precision_score(y_test, y_pred)\n", " recall = recall_score(y_test, y_pred)\n", " f1 = f1_score(y_test, y_pred)\n", " roc_auc = roc_auc_score(y_test, y_pred)\n", " cm = confusion_matrix(y_test, y_pred)\n", "\n", " # Plot confusion matrix\n", " ax = plt.subplot(N_ROWS, N_COLS, i + 1, aspect=\"equal\")\n", " ax.grid(False)\n", " ax.set_title(f\"{name} | Accuracy: {accuracy:.2%}\")\n", "\n", " labels = [\"Negative\", \"Positive\"]\n", " sns.heatmap(\n", " cm,\n", " xticklabels=labels if i // N_COLS == N_ROWS - 1 else [],\n", " yticklabels=labels if i % N_COLS == 0 else [],\n", " annot=cm_annotations(cm),\n", " square=True,\n", " cbar=False,\n", " cmap=\"viridis\",\n", " linewidths=0.5,\n", " fmt=\"\",\n", " ax=ax,\n", " )\n", "\n", " # Save results\n", " results.loc[i] = [name, accuracy, precision, recall, f1, roc_auc, tim, mem // 1024]\n", "\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# Sort the results by accuracy\n", "results = results.sort_values(\"Accuracy\", ascending=False).reset_index(drop=True)\n", "\n", "# Save the results to CSV\n", "output_results = results.copy()\n", "output_results.columns = output_results.columns.str.lower().str.replace(\" \", \"_\")\n", "output_results = output_results.rename(columns={\"time\": \"time_seconds\", \"memory\": \"memory_kb\"})\n", "output_results.to_csv(DATA_DIR / \"proto_results.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ClassifierAccuracyPrecisionRecallF1ROC AUCTimeMemory
0RandomForestClassifier80.95%78.26%85.71%81.82%80.95%97.70s504KB
1VotingClassifier76.19%76.19%76.19%76.19%76.19%69.08s1288KB
2LogisticRegression73.81%72.73%76.19%74.42%73.81%0.96s310KB
3SVC71.43%73.68%66.67%70.00%71.43%0.56s244KB
4MLPClassifier71.43%73.68%66.67%70.00%71.43%20.38s1208KB
5AdaBoostClassifier69.05%65.38%80.95%72.34%69.05%93.89s643KB
6GradientBoostingClassifier69.05%65.38%80.95%72.34%69.05%46.88s899KB
7KNeighborsClassifier64.29%68.75%52.38%59.46%64.29%0.46s293KB
8DecisionTreeClassifier61.90%57.14%95.24%71.43%61.90%0.53s324KB
\n" ], "text/plain": [ "" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Display the results\n", "results.style.format(\n", " {\n", " \"Accuracy\": \"{:.2%}\",\n", " \"Precision\": \"{:.2%}\",\n", " \"Recall\": \"{:.2%}\",\n", " \"F1\": \"{:.2%}\",\n", " \"ROC AUC\": \"{:.2%}\",\n", " \"Time\": \"{:.2f}s\",\n", " \"Memory\": \"{}KB\",\n", " },\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }