{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import tqdm\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns;\n",
"\n",
"from sklearn.datasets import fetch_20newsgroups\n",
"from sklearn.manifold import TSNE\n",
"from pycaret.anomaly import *\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"embeding_df=pd.read_csv('/mnt/c/Users/selin_uzturk/Desktop/sinkaf/encoded.csv')\n",
"embeding_df=embeding_df.drop(['Unnamed: 0'], axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" ... | \n",
" 56 | \n",
" 57 | \n",
" 58 | \n",
" 59 | \n",
" 60 | \n",
" 61 | \n",
" 62 | \n",
" 63 | \n",
" labels | \n",
" tweet | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 101 | \n",
" 10110 | \n",
" 175 | \n",
" 78653 | \n",
" 189 | \n",
" 25285 | \n",
" 15976 | \n",
" 40840 | \n",
" 276 | \n",
" 31623 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" en güzel uyuyan insan ödülü jeon jungkook'a g... | \n",
"
\n",
" \n",
" 1 | \n",
" 101 | \n",
" 11589 | \n",
" 10706 | \n",
" 10713 | \n",
" 10794 | \n",
" 94698 | \n",
" 30668 | \n",
" 24883 | \n",
" 117 | \n",
" 23763 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Mekanı cennet olsun, saygılar sayın avukatımı... | \n",
"
\n",
" \n",
" 2 | \n",
" 101 | \n",
" 148 | \n",
" 30471 | \n",
" 10774 | \n",
" 13785 | \n",
" 13779 | \n",
" 33642 | \n",
" 14399 | \n",
" 48271 | \n",
" 76686 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Kızlar aranızda kas yığını beylere düşenler ol... | \n",
"
\n",
" \n",
" 3 | \n",
" 101 | \n",
" 19319 | \n",
" 16724 | \n",
" 10118 | \n",
" 10107 | \n",
" 78323 | \n",
" 12407 | \n",
" 38959 | \n",
" 22934 | \n",
" 10147 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Biraz ders çalışayım. Tembellik ve uyku düşman... | \n",
"
\n",
" \n",
" 4 | \n",
" 101 | \n",
" 30932 | \n",
" 58706 | \n",
" 58054 | \n",
" 44907 | \n",
" 10224 | \n",
" 106583 | \n",
" 10288 | \n",
" 12524 | \n",
" 13878 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Trezeguet yerine El Sharawy daha iyi olmaz mı | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 43344 | \n",
" 101 | \n",
" 20065 | \n",
" 10161 | \n",
" 115 | \n",
" 115 | \n",
" 103784 | \n",
" 10774 | \n",
" 21388 | \n",
" 10245 | \n",
" 92067 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Hil**adamlar kesinlikle kelimeleri anlamıyorla... | \n",
"
\n",
" \n",
" 43345 | \n",
" 101 | \n",
" 139 | \n",
" 80839 | \n",
" 24109 | \n",
" 13406 | \n",
" 18985 | \n",
" 16285 | \n",
" 10163 | \n",
" 11062 | \n",
" 276 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Böyle piçlerin çok erken ölmemelerini ve çok f... | \n",
"
\n",
" \n",
" 43346 | \n",
" 101 | \n",
" 105549 | \n",
" 102635 | \n",
" 10140 | \n",
" 26943 | \n",
" 11499 | \n",
" 110516 | \n",
" 21899 | \n",
" 11861 | \n",
" 10561 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Turgay denilen bu holigonda bir sorun yok, gur... | \n",
"
\n",
" \n",
" 43347 | \n",
" 101 | \n",
" 81424 | \n",
" 26398 | \n",
" 92017 | \n",
" 109620 | \n",
" 10941 | \n",
" 76010 | \n",
" 10115 | \n",
" 19830 | \n",
" 26083 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Umarım ülkenin düşük zekadan kurtulması ilgile... | \n",
"
\n",
" \n",
" 43348 | \n",
" 101 | \n",
" 39774 | \n",
" 11127 | \n",
" 45989 | \n",
" 24596 | \n",
" 11933 | \n",
" 170 | \n",
" 17145 | \n",
" 10710 | \n",
" 39125 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" CHP sandıkları bırakmaz, üzerine oturur, bir c... | \n",
"
\n",
" \n",
"
\n",
"
43349 rows × 66 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 \\\n",
"0 101 10110 175 78653 189 25285 15976 40840 276 \n",
"1 101 11589 10706 10713 10794 94698 30668 24883 117 \n",
"2 101 148 30471 10774 13785 13779 33642 14399 48271 \n",
"3 101 19319 16724 10118 10107 78323 12407 38959 22934 \n",
"4 101 30932 58706 58054 44907 10224 106583 10288 12524 \n",
"... ... ... ... ... ... ... ... ... ... \n",
"43344 101 20065 10161 115 115 103784 10774 21388 10245 \n",
"43345 101 139 80839 24109 13406 18985 16285 10163 11062 \n",
"43346 101 105549 102635 10140 26943 11499 110516 21899 11861 \n",
"43347 101 81424 26398 92017 109620 10941 76010 10115 19830 \n",
"43348 101 39774 11127 45989 24596 11933 170 17145 10710 \n",
"\n",
" 9 ... 56 57 58 59 60 61 62 63 labels \\\n",
"0 31623 ... 0 0 0 0 0 0 0 0 0 \n",
"1 23763 ... 0 0 0 0 0 0 0 0 0 \n",
"2 76686 ... 0 0 0 0 0 0 0 0 0 \n",
"3 10147 ... 0 0 0 0 0 0 0 0 0 \n",
"4 13878 ... 0 0 0 0 0 0 0 0 0 \n",
"... ... ... .. .. .. .. .. .. .. .. ... \n",
"43344 92067 ... 0 0 0 0 0 0 0 0 1 \n",
"43345 276 ... 0 0 0 0 0 0 0 0 1 \n",
"43346 10561 ... 0 0 0 0 0 0 0 0 1 \n",
"43347 26083 ... 0 0 0 0 0 0 0 0 1 \n",
"43348 39125 ... 0 0 0 0 0 0 0 0 1 \n",
"\n",
" tweet \n",
"0 en güzel uyuyan insan ödülü jeon jungkook'a g... \n",
"1 Mekanı cennet olsun, saygılar sayın avukatımı... \n",
"2 Kızlar aranızda kas yığını beylere düşenler ol... \n",
"3 Biraz ders çalışayım. Tembellik ve uyku düşman... \n",
"4 Trezeguet yerine El Sharawy daha iyi olmaz mı \n",
"... ... \n",
"43344 Hil**adamlar kesinlikle kelimeleri anlamıyorla... \n",
"43345 Böyle piçlerin çok erken ölmemelerini ve çok f... \n",
"43346 Turgay denilen bu holigonda bir sorun yok, gur... \n",
"43347 Umarım ülkenin düşük zekadan kurtulması ilgile... \n",
"43348 CHP sandıkları bırakmaz, üzerine oturur, bir c... \n",
"\n",
"[43349 rows x 66 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"embeding_df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Description | \n",
" Value | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Session id | \n",
" 5272 | \n",
"
\n",
" \n",
" 1 | \n",
" Original data shape | \n",
" (43349, 66) | \n",
"
\n",
" \n",
" 2 | \n",
" Transformed data shape | \n",
" (43349, 65) | \n",
"
\n",
" \n",
" 3 | \n",
" Ignore features | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" Numeric features | \n",
" 65 | \n",
"
\n",
" \n",
" 5 | \n",
" Preprocess | \n",
" True | \n",
"
\n",
" \n",
" 6 | \n",
" Imputation type | \n",
" simple | \n",
"
\n",
" \n",
" 7 | \n",
" Numeric imputation | \n",
" mean | \n",
"
\n",
" \n",
" 8 | \n",
" Categorical imputation | \n",
" mode | \n",
"
\n",
" \n",
" 9 | \n",
" CPU Jobs | \n",
" -1 | \n",
"
\n",
" \n",
" 10 | \n",
" Use GPU | \n",
" False | \n",
"
\n",
" \n",
" 11 | \n",
" Log Experiment | \n",
" False | \n",
"
\n",
" \n",
" 12 | \n",
" Experiment Name | \n",
" anomaly-default-name | \n",
"
\n",
" \n",
" 13 | \n",
" USI | \n",
" ca74 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"ano1= setup(embeding_df,ignore_features=['tweet'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Reference | \n",
"
\n",
" \n",
" ID | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" abod | \n",
" Angle-base Outlier Detection | \n",
" pyod.models.abod.ABOD | \n",
"
\n",
" \n",
" cluster | \n",
" Clustering-Based Local Outlier | \n",
" pycaret.internal.patches.pyod.CBLOFForceToDouble | \n",
"
\n",
" \n",
" cof | \n",
" Connectivity-Based Local Outlier | \n",
" pyod.models.cof.COF | \n",
"
\n",
" \n",
" iforest | \n",
" Isolation Forest | \n",
" pyod.models.iforest.IForest | \n",
"
\n",
" \n",
" histogram | \n",
" Histogram-based Outlier Detection | \n",
" pyod.models.hbos.HBOS | \n",
"
\n",
" \n",
" knn | \n",
" K-Nearest Neighbors Detector | \n",
" pyod.models.knn.KNN | \n",
"
\n",
" \n",
" lof | \n",
" Local Outlier Factor | \n",
" pyod.models.lof.LOF | \n",
"
\n",
" \n",
" svm | \n",
" One-class SVM detector | \n",
" pyod.models.ocsvm.OCSVM | \n",
"
\n",
" \n",
" pca | \n",
" Principal Component Analysis | \n",
" pyod.models.pca.PCA | \n",
"
\n",
" \n",
" mcd | \n",
" Minimum Covariance Determinant | \n",
" pyod.models.mcd.MCD | \n",
"
\n",
" \n",
" sod | \n",
" Subspace Outlier Detection | \n",
" pyod.models.sod.SOD | \n",
"
\n",
" \n",
" sos | \n",
" Stochastic Outlier Selection | \n",
" pyod.models.sos.SOS | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name \\\n",
"ID \n",
"abod Angle-base Outlier Detection \n",
"cluster Clustering-Based Local Outlier \n",
"cof Connectivity-Based Local Outlier \n",
"iforest Isolation Forest \n",
"histogram Histogram-based Outlier Detection \n",
"knn K-Nearest Neighbors Detector \n",
"lof Local Outlier Factor \n",
"svm One-class SVM detector \n",
"pca Principal Component Analysis \n",
"mcd Minimum Covariance Determinant \n",
"sod Subspace Outlier Detection \n",
"sos Stochastic Outlier Selection \n",
"\n",
" Reference \n",
"ID \n",
"abod pyod.models.abod.ABOD \n",
"cluster pycaret.internal.patches.pyod.CBLOFForceToDouble \n",
"cof pyod.models.cof.COF \n",
"iforest pyod.models.iforest.IForest \n",
"histogram pyod.models.hbos.HBOS \n",
"knn pyod.models.knn.KNN \n",
"lof pyod.models.lof.LOF \n",
"svm pyod.models.ocsvm.OCSVM \n",
"pca pyod.models.pca.PCA \n",
"mcd pyod.models.mcd.MCD \n",
"sod pyod.models.sod.SOD \n",
"sos pyod.models.sos.SOS "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"models()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# iforest"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# iforest = create_model('iforest')\n",
"# iforest_anomalies = assign_model(iforest)\n",
"# # iso_df=embeding_df.drop(['tweet'], axis=1)\n",
"# iforest_pred = predict_model(iforest, data=iso_df)\n",
"# iforest_pred"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# iforest_pred['Anomaly'].value_counts()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# knn\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" ... | \n",
" 57 | \n",
" 58 | \n",
" 59 | \n",
" 60 | \n",
" 61 | \n",
" 62 | \n",
" 63 | \n",
" labels | \n",
" Anomaly | \n",
" Anomaly_Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 101.0 | \n",
" 10110.0 | \n",
" 175.0 | \n",
" 78653.0 | \n",
" 189.0 | \n",
" 25285.0 | \n",
" 15976.0 | \n",
" 40840.0 | \n",
" 276.0 | \n",
" 31623.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 70971.171936 | \n",
"
\n",
" \n",
" 1 | \n",
" 101.0 | \n",
" 11589.0 | \n",
" 10706.0 | \n",
" 10713.0 | \n",
" 10794.0 | \n",
" 94698.0 | \n",
" 30668.0 | \n",
" 24883.0 | \n",
" 117.0 | \n",
" 23763.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 77147.550363 | \n",
"
\n",
" \n",
" 2 | \n",
" 101.0 | \n",
" 148.0 | \n",
" 30471.0 | \n",
" 10774.0 | \n",
" 13785.0 | \n",
" 13779.0 | \n",
" 33642.0 | \n",
" 14399.0 | \n",
" 48271.0 | \n",
" 76686.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 118676.465801 | \n",
"
\n",
" \n",
" 3 | \n",
" 101.0 | \n",
" 19319.0 | \n",
" 16724.0 | \n",
" 10118.0 | \n",
" 10107.0 | \n",
" 78323.0 | \n",
" 12407.0 | \n",
" 38959.0 | \n",
" 22934.0 | \n",
" 10147.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 94310.765409 | \n",
"
\n",
" \n",
" 4 | \n",
" 101.0 | \n",
" 30932.0 | \n",
" 58706.0 | \n",
" 58054.0 | \n",
" 44907.0 | \n",
" 10224.0 | \n",
" 106583.0 | \n",
" 10288.0 | \n",
" 12524.0 | \n",
" 13878.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 63569.489655 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 43344 | \n",
" 101.0 | \n",
" 20065.0 | \n",
" 10161.0 | \n",
" 115.0 | \n",
" 115.0 | \n",
" 103784.0 | \n",
" 10774.0 | \n",
" 21388.0 | \n",
" 10245.0 | \n",
" 92067.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 183310.474995 | \n",
"
\n",
" \n",
" 43345 | \n",
" 101.0 | \n",
" 139.0 | \n",
" 80839.0 | \n",
" 24109.0 | \n",
" 13406.0 | \n",
" 18985.0 | \n",
" 16285.0 | \n",
" 10163.0 | \n",
" 11062.0 | \n",
" 276.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 140717.435036 | \n",
"
\n",
" \n",
" 43346 | \n",
" 101.0 | \n",
" 105549.0 | \n",
" 102635.0 | \n",
" 10140.0 | \n",
" 26943.0 | \n",
" 11499.0 | \n",
" 110516.0 | \n",
" 21899.0 | \n",
" 11861.0 | \n",
" 10561.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 98954.428628 | \n",
"
\n",
" \n",
" 43347 | \n",
" 101.0 | \n",
" 81424.0 | \n",
" 26398.0 | \n",
" 92017.0 | \n",
" 109620.0 | \n",
" 10941.0 | \n",
" 76010.0 | \n",
" 10115.0 | \n",
" 19830.0 | \n",
" 26083.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 65424.117159 | \n",
"
\n",
" \n",
" 43348 | \n",
" 101.0 | \n",
" 39774.0 | \n",
" 11127.0 | \n",
" 45989.0 | \n",
" 24596.0 | \n",
" 11933.0 | \n",
" 170.0 | \n",
" 17145.0 | \n",
" 10710.0 | \n",
" 39125.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 182332.274049 | \n",
"
\n",
" \n",
"
\n",
"
43349 rows × 67 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n",
"1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n",
"2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n",
"3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n",
"4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n",
"... ... ... ... ... ... ... ... \n",
"43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n",
"43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n",
"43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n",
"43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n",
"43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n",
"\n",
" 7 8 9 ... 57 58 59 60 61 62 63 \\\n",
"0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... ... ... ... ... ... ... \n",
"43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" labels Anomaly Anomaly_Score \n",
"0 0.0 0 70971.171936 \n",
"1 0.0 0 77147.550363 \n",
"2 0.0 0 118676.465801 \n",
"3 0.0 0 94310.765409 \n",
"4 0.0 0 63569.489655 \n",
"... ... ... ... \n",
"43344 1.0 0 183310.474995 \n",
"43345 1.0 0 140717.435036 \n",
"43346 1.0 0 98954.428628 \n",
"43347 1.0 0 65424.117159 \n",
"43348 1.0 0 182332.274049 \n",
"\n",
"[43349 rows x 67 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"knn = create_model('knn')\n",
"knn_anomalies = assign_model(knn)\n",
"knn_df=embeding_df.drop(['tweet'], axis=1)\n",
"knn_pred = predict_model(knn, data=knn_df)\n",
"knn_pred"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 41376\n",
"1 1973\n",
"Name: Anomaly, dtype: int64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"knn_pred['Anomaly'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" ... | \n",
" 57 | \n",
" 58 | \n",
" 59 | \n",
" 60 | \n",
" 61 | \n",
" 62 | \n",
" 63 | \n",
" labels | \n",
" Anomaly | \n",
" Anomaly_Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 101.0 | \n",
" 10110.0 | \n",
" 175.0 | \n",
" 78653.0 | \n",
" 189.0 | \n",
" 25285.0 | \n",
" 15976.0 | \n",
" 40840.0 | \n",
" 276.0 | \n",
" 31623.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 70971.171936 | \n",
"
\n",
" \n",
" 1 | \n",
" 101.0 | \n",
" 11589.0 | \n",
" 10706.0 | \n",
" 10713.0 | \n",
" 10794.0 | \n",
" 94698.0 | \n",
" 30668.0 | \n",
" 24883.0 | \n",
" 117.0 | \n",
" 23763.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 77147.550363 | \n",
"
\n",
" \n",
" 2 | \n",
" 101.0 | \n",
" 148.0 | \n",
" 30471.0 | \n",
" 10774.0 | \n",
" 13785.0 | \n",
" 13779.0 | \n",
" 33642.0 | \n",
" 14399.0 | \n",
" 48271.0 | \n",
" 76686.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 118676.465801 | \n",
"
\n",
" \n",
" 3 | \n",
" 101.0 | \n",
" 19319.0 | \n",
" 16724.0 | \n",
" 10118.0 | \n",
" 10107.0 | \n",
" 78323.0 | \n",
" 12407.0 | \n",
" 38959.0 | \n",
" 22934.0 | \n",
" 10147.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 94310.765409 | \n",
"
\n",
" \n",
" 4 | \n",
" 101.0 | \n",
" 30932.0 | \n",
" 58706.0 | \n",
" 58054.0 | \n",
" 44907.0 | \n",
" 10224.0 | \n",
" 106583.0 | \n",
" 10288.0 | \n",
" 12524.0 | \n",
" 13878.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 63569.489655 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 43344 | \n",
" 101.0 | \n",
" 20065.0 | \n",
" 10161.0 | \n",
" 115.0 | \n",
" 115.0 | \n",
" 103784.0 | \n",
" 10774.0 | \n",
" 21388.0 | \n",
" 10245.0 | \n",
" 92067.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 183310.474995 | \n",
"
\n",
" \n",
" 43345 | \n",
" 101.0 | \n",
" 139.0 | \n",
" 80839.0 | \n",
" 24109.0 | \n",
" 13406.0 | \n",
" 18985.0 | \n",
" 16285.0 | \n",
" 10163.0 | \n",
" 11062.0 | \n",
" 276.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 140717.435036 | \n",
"
\n",
" \n",
" 43346 | \n",
" 101.0 | \n",
" 105549.0 | \n",
" 102635.0 | \n",
" 10140.0 | \n",
" 26943.0 | \n",
" 11499.0 | \n",
" 110516.0 | \n",
" 21899.0 | \n",
" 11861.0 | \n",
" 10561.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 98954.428628 | \n",
"
\n",
" \n",
" 43347 | \n",
" 101.0 | \n",
" 81424.0 | \n",
" 26398.0 | \n",
" 92017.0 | \n",
" 109620.0 | \n",
" 10941.0 | \n",
" 76010.0 | \n",
" 10115.0 | \n",
" 19830.0 | \n",
" 26083.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 65424.117159 | \n",
"
\n",
" \n",
" 43348 | \n",
" 101.0 | \n",
" 39774.0 | \n",
" 11127.0 | \n",
" 45989.0 | \n",
" 24596.0 | \n",
" 11933.0 | \n",
" 170.0 | \n",
" 17145.0 | \n",
" 10710.0 | \n",
" 39125.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 182332.274049 | \n",
"
\n",
" \n",
"
\n",
"
43349 rows × 67 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n",
"1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n",
"2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n",
"3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n",
"4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n",
"... ... ... ... ... ... ... ... \n",
"43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n",
"43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n",
"43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n",
"43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n",
"43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n",
"\n",
" 7 8 9 ... 57 58 59 60 61 62 63 \\\n",
"0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... ... ... ... ... ... ... \n",
"43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" labels Anomaly Anomaly_Score \n",
"0 0.0 0 70971.171936 \n",
"1 0.0 0 77147.550363 \n",
"2 0.0 0 118676.465801 \n",
"3 0.0 0 94310.765409 \n",
"4 0.0 0 63569.489655 \n",
"... ... ... ... \n",
"43344 1.0 0 183310.474995 \n",
"43345 1.0 0 140717.435036 \n",
"43346 1.0 0 98954.428628 \n",
"43347 1.0 0 65424.117159 \n",
"43348 1.0 0 182332.274049 \n",
"\n",
"[43349 rows x 67 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"knn_pred"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 41376\n",
"1 1973\n",
"Name: Anomaly, dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"knn_pred['Anomaly'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" ... | \n",
" 56 | \n",
" 57 | \n",
" 58 | \n",
" 59 | \n",
" 60 | \n",
" 61 | \n",
" 62 | \n",
" 63 | \n",
" labels | \n",
" tweet | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 101 | \n",
" 10110 | \n",
" 175 | \n",
" 78653 | \n",
" 189 | \n",
" 25285 | \n",
" 15976 | \n",
" 40840 | \n",
" 276 | \n",
" 31623 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" en güzel uyuyan insan ödülü jeon jungkook'a g... | \n",
"
\n",
" \n",
" 1 | \n",
" 101 | \n",
" 11589 | \n",
" 10706 | \n",
" 10713 | \n",
" 10794 | \n",
" 94698 | \n",
" 30668 | \n",
" 24883 | \n",
" 117 | \n",
" 23763 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Mekanı cennet olsun, saygılar sayın avukatımı... | \n",
"
\n",
" \n",
" 2 | \n",
" 101 | \n",
" 148 | \n",
" 30471 | \n",
" 10774 | \n",
" 13785 | \n",
" 13779 | \n",
" 33642 | \n",
" 14399 | \n",
" 48271 | \n",
" 76686 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Kızlar aranızda kas yığını beylere düşenler ol... | \n",
"
\n",
" \n",
" 3 | \n",
" 101 | \n",
" 19319 | \n",
" 16724 | \n",
" 10118 | \n",
" 10107 | \n",
" 78323 | \n",
" 12407 | \n",
" 38959 | \n",
" 22934 | \n",
" 10147 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Biraz ders çalışayım. Tembellik ve uyku düşman... | \n",
"
\n",
" \n",
" 4 | \n",
" 101 | \n",
" 30932 | \n",
" 58706 | \n",
" 58054 | \n",
" 44907 | \n",
" 10224 | \n",
" 106583 | \n",
" 10288 | \n",
" 12524 | \n",
" 13878 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Trezeguet yerine El Sharawy daha iyi olmaz mı | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 43344 | \n",
" 101 | \n",
" 20065 | \n",
" 10161 | \n",
" 115 | \n",
" 115 | \n",
" 103784 | \n",
" 10774 | \n",
" 21388 | \n",
" 10245 | \n",
" 92067 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Hil**adamlar kesinlikle kelimeleri anlamıyorla... | \n",
"
\n",
" \n",
" 43345 | \n",
" 101 | \n",
" 139 | \n",
" 80839 | \n",
" 24109 | \n",
" 13406 | \n",
" 18985 | \n",
" 16285 | \n",
" 10163 | \n",
" 11062 | \n",
" 276 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Böyle piçlerin çok erken ölmemelerini ve çok f... | \n",
"
\n",
" \n",
" 43346 | \n",
" 101 | \n",
" 105549 | \n",
" 102635 | \n",
" 10140 | \n",
" 26943 | \n",
" 11499 | \n",
" 110516 | \n",
" 21899 | \n",
" 11861 | \n",
" 10561 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Turgay denilen bu holigonda bir sorun yok, gur... | \n",
"
\n",
" \n",
" 43347 | \n",
" 101 | \n",
" 81424 | \n",
" 26398 | \n",
" 92017 | \n",
" 109620 | \n",
" 10941 | \n",
" 76010 | \n",
" 10115 | \n",
" 19830 | \n",
" 26083 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Umarım ülkenin düşük zekadan kurtulması ilgile... | \n",
"
\n",
" \n",
" 43348 | \n",
" 101 | \n",
" 39774 | \n",
" 11127 | \n",
" 45989 | \n",
" 24596 | \n",
" 11933 | \n",
" 170 | \n",
" 17145 | \n",
" 10710 | \n",
" 39125 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" CHP sandıkları bırakmaz, üzerine oturur, bir c... | \n",
"
\n",
" \n",
"
\n",
"
43349 rows × 66 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 \\\n",
"0 101 10110 175 78653 189 25285 15976 40840 276 \n",
"1 101 11589 10706 10713 10794 94698 30668 24883 117 \n",
"2 101 148 30471 10774 13785 13779 33642 14399 48271 \n",
"3 101 19319 16724 10118 10107 78323 12407 38959 22934 \n",
"4 101 30932 58706 58054 44907 10224 106583 10288 12524 \n",
"... ... ... ... ... ... ... ... ... ... \n",
"43344 101 20065 10161 115 115 103784 10774 21388 10245 \n",
"43345 101 139 80839 24109 13406 18985 16285 10163 11062 \n",
"43346 101 105549 102635 10140 26943 11499 110516 21899 11861 \n",
"43347 101 81424 26398 92017 109620 10941 76010 10115 19830 \n",
"43348 101 39774 11127 45989 24596 11933 170 17145 10710 \n",
"\n",
" 9 ... 56 57 58 59 60 61 62 63 labels \\\n",
"0 31623 ... 0 0 0 0 0 0 0 0 0 \n",
"1 23763 ... 0 0 0 0 0 0 0 0 0 \n",
"2 76686 ... 0 0 0 0 0 0 0 0 0 \n",
"3 10147 ... 0 0 0 0 0 0 0 0 0 \n",
"4 13878 ... 0 0 0 0 0 0 0 0 0 \n",
"... ... ... .. .. .. .. .. .. .. .. ... \n",
"43344 92067 ... 0 0 0 0 0 0 0 0 1 \n",
"43345 276 ... 0 0 0 0 0 0 0 0 1 \n",
"43346 10561 ... 0 0 0 0 0 0 0 0 1 \n",
"43347 26083 ... 0 0 0 0 0 0 0 0 1 \n",
"43348 39125 ... 0 0 0 0 0 0 0 0 1 \n",
"\n",
" tweet \n",
"0 en güzel uyuyan insan ödülü jeon jungkook'a g... \n",
"1 Mekanı cennet olsun, saygılar sayın avukatımı... \n",
"2 Kızlar aranızda kas yığını beylere düşenler ol... \n",
"3 Biraz ders çalışayım. Tembellik ve uyku düşman... \n",
"4 Trezeguet yerine El Sharawy daha iyi olmaz mı \n",
"... ... \n",
"43344 Hil**adamlar kesinlikle kelimeleri anlamıyorla... \n",
"43345 Böyle piçlerin çok erken ölmemelerini ve çok f... \n",
"43346 Turgay denilen bu holigonda bir sorun yok, gur... \n",
"43347 Umarım ülkenin düşük zekadan kurtulması ilgile... \n",
"43348 CHP sandıkları bırakmaz, üzerine oturur, bir c... \n",
"\n",
"[43349 rows x 66 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"embeding_df"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"embeding_df.drop(knn_pred.loc[knn_pred['Anomaly']==1 ].index, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" ... | \n",
" 56 | \n",
" 57 | \n",
" 58 | \n",
" 59 | \n",
" 60 | \n",
" 61 | \n",
" 62 | \n",
" 63 | \n",
" labels | \n",
" tweet | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 101 | \n",
" 10110 | \n",
" 175 | \n",
" 78653 | \n",
" 189 | \n",
" 25285 | \n",
" 15976 | \n",
" 40840 | \n",
" 276 | \n",
" 31623 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" en güzel uyuyan insan ödülü jeon jungkook'a g... | \n",
"
\n",
" \n",
" 1 | \n",
" 101 | \n",
" 11589 | \n",
" 10706 | \n",
" 10713 | \n",
" 10794 | \n",
" 94698 | \n",
" 30668 | \n",
" 24883 | \n",
" 117 | \n",
" 23763 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Mekanı cennet olsun, saygılar sayın avukatımı... | \n",
"
\n",
" \n",
" 2 | \n",
" 101 | \n",
" 148 | \n",
" 30471 | \n",
" 10774 | \n",
" 13785 | \n",
" 13779 | \n",
" 33642 | \n",
" 14399 | \n",
" 48271 | \n",
" 76686 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Kızlar aranızda kas yığını beylere düşenler ol... | \n",
"
\n",
" \n",
" 3 | \n",
" 101 | \n",
" 19319 | \n",
" 16724 | \n",
" 10118 | \n",
" 10107 | \n",
" 78323 | \n",
" 12407 | \n",
" 38959 | \n",
" 22934 | \n",
" 10147 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Biraz ders çalışayım. Tembellik ve uyku düşman... | \n",
"
\n",
" \n",
" 4 | \n",
" 101 | \n",
" 30932 | \n",
" 58706 | \n",
" 58054 | \n",
" 44907 | \n",
" 10224 | \n",
" 106583 | \n",
" 10288 | \n",
" 12524 | \n",
" 13878 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Trezeguet yerine El Sharawy daha iyi olmaz mı | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 43344 | \n",
" 101 | \n",
" 20065 | \n",
" 10161 | \n",
" 115 | \n",
" 115 | \n",
" 103784 | \n",
" 10774 | \n",
" 21388 | \n",
" 10245 | \n",
" 92067 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Hil**adamlar kesinlikle kelimeleri anlamıyorla... | \n",
"
\n",
" \n",
" 43345 | \n",
" 101 | \n",
" 139 | \n",
" 80839 | \n",
" 24109 | \n",
" 13406 | \n",
" 18985 | \n",
" 16285 | \n",
" 10163 | \n",
" 11062 | \n",
" 276 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Böyle piçlerin çok erken ölmemelerini ve çok f... | \n",
"
\n",
" \n",
" 43346 | \n",
" 101 | \n",
" 105549 | \n",
" 102635 | \n",
" 10140 | \n",
" 26943 | \n",
" 11499 | \n",
" 110516 | \n",
" 21899 | \n",
" 11861 | \n",
" 10561 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Turgay denilen bu holigonda bir sorun yok, gur... | \n",
"
\n",
" \n",
" 43347 | \n",
" 101 | \n",
" 81424 | \n",
" 26398 | \n",
" 92017 | \n",
" 109620 | \n",
" 10941 | \n",
" 76010 | \n",
" 10115 | \n",
" 19830 | \n",
" 26083 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Umarım ülkenin düşük zekadan kurtulması ilgile... | \n",
"
\n",
" \n",
" 43348 | \n",
" 101 | \n",
" 39774 | \n",
" 11127 | \n",
" 45989 | \n",
" 24596 | \n",
" 11933 | \n",
" 170 | \n",
" 17145 | \n",
" 10710 | \n",
" 39125 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" CHP sandıkları bırakmaz, üzerine oturur, bir c... | \n",
"
\n",
" \n",
"
\n",
"
41376 rows × 66 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 \\\n",
"0 101 10110 175 78653 189 25285 15976 40840 276 \n",
"1 101 11589 10706 10713 10794 94698 30668 24883 117 \n",
"2 101 148 30471 10774 13785 13779 33642 14399 48271 \n",
"3 101 19319 16724 10118 10107 78323 12407 38959 22934 \n",
"4 101 30932 58706 58054 44907 10224 106583 10288 12524 \n",
"... ... ... ... ... ... ... ... ... ... \n",
"43344 101 20065 10161 115 115 103784 10774 21388 10245 \n",
"43345 101 139 80839 24109 13406 18985 16285 10163 11062 \n",
"43346 101 105549 102635 10140 26943 11499 110516 21899 11861 \n",
"43347 101 81424 26398 92017 109620 10941 76010 10115 19830 \n",
"43348 101 39774 11127 45989 24596 11933 170 17145 10710 \n",
"\n",
" 9 ... 56 57 58 59 60 61 62 63 labels \\\n",
"0 31623 ... 0 0 0 0 0 0 0 0 0 \n",
"1 23763 ... 0 0 0 0 0 0 0 0 0 \n",
"2 76686 ... 0 0 0 0 0 0 0 0 0 \n",
"3 10147 ... 0 0 0 0 0 0 0 0 0 \n",
"4 13878 ... 0 0 0 0 0 0 0 0 0 \n",
"... ... ... .. .. .. .. .. .. .. .. ... \n",
"43344 92067 ... 0 0 0 0 0 0 0 0 1 \n",
"43345 276 ... 0 0 0 0 0 0 0 0 1 \n",
"43346 10561 ... 0 0 0 0 0 0 0 0 1 \n",
"43347 26083 ... 0 0 0 0 0 0 0 0 1 \n",
"43348 39125 ... 0 0 0 0 0 0 0 0 1 \n",
"\n",
" tweet \n",
"0 en güzel uyuyan insan ödülü jeon jungkook'a g... \n",
"1 Mekanı cennet olsun, saygılar sayın avukatımı... \n",
"2 Kızlar aranızda kas yığını beylere düşenler ol... \n",
"3 Biraz ders çalışayım. Tembellik ve uyku düşman... \n",
"4 Trezeguet yerine El Sharawy daha iyi olmaz mı \n",
"... ... \n",
"43344 Hil**adamlar kesinlikle kelimeleri anlamıyorla... \n",
"43345 Böyle piçlerin çok erken ölmemelerini ve çok f... \n",
"43346 Turgay denilen bu holigonda bir sorun yok, gur... \n",
"43347 Umarım ülkenin düşük zekadan kurtulması ilgile... \n",
"43348 CHP sandıkları bırakmaz, üzerine oturur, bir c... \n",
"\n",
"[41376 rows x 66 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"embeding_df"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"df=pd.DataFrame()\n",
"df['tweet']=embeding_df['tweet']\n",
"df['subtas_a']=embeding_df['labels']"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('knn_outliers.csv') "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# pca"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" ... | \n",
" 57 | \n",
" 58 | \n",
" 59 | \n",
" 60 | \n",
" 61 | \n",
" 62 | \n",
" 63 | \n",
" labels | \n",
" Anomaly | \n",
" Anomaly_Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 101.0 | \n",
" 10110.0 | \n",
" 175.0 | \n",
" 78653.0 | \n",
" 189.0 | \n",
" 25285.0 | \n",
" 15976.0 | \n",
" 40840.0 | \n",
" 276.0 | \n",
" 31623.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 1.354399e+32 | \n",
"
\n",
" \n",
" 1 | \n",
" 101.0 | \n",
" 11589.0 | \n",
" 10706.0 | \n",
" 10713.0 | \n",
" 10794.0 | \n",
" 94698.0 | \n",
" 30668.0 | \n",
" 24883.0 | \n",
" 117.0 | \n",
" 23763.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 1.311723e+32 | \n",
"
\n",
" \n",
" 2 | \n",
" 101.0 | \n",
" 148.0 | \n",
" 30471.0 | \n",
" 10774.0 | \n",
" 13785.0 | \n",
" 13779.0 | \n",
" 33642.0 | \n",
" 14399.0 | \n",
" 48271.0 | \n",
" 76686.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 1.597792e+32 | \n",
"
\n",
" \n",
" 3 | \n",
" 101.0 | \n",
" 19319.0 | \n",
" 16724.0 | \n",
" 10118.0 | \n",
" 10107.0 | \n",
" 78323.0 | \n",
" 12407.0 | \n",
" 38959.0 | \n",
" 22934.0 | \n",
" 10147.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 1.551488e+32 | \n",
"
\n",
" \n",
" 4 | \n",
" 101.0 | \n",
" 30932.0 | \n",
" 58706.0 | \n",
" 58054.0 | \n",
" 44907.0 | \n",
" 10224.0 | \n",
" 106583.0 | \n",
" 10288.0 | \n",
" 12524.0 | \n",
" 13878.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 1.348867e+32 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 43344 | \n",
" 101.0 | \n",
" 20065.0 | \n",
" 10161.0 | \n",
" 115.0 | \n",
" 115.0 | \n",
" 103784.0 | \n",
" 10774.0 | \n",
" 21388.0 | \n",
" 10245.0 | \n",
" 92067.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 2.346619e+32 | \n",
"
\n",
" \n",
" 43345 | \n",
" 101.0 | \n",
" 139.0 | \n",
" 80839.0 | \n",
" 24109.0 | \n",
" 13406.0 | \n",
" 18985.0 | \n",
" 16285.0 | \n",
" 10163.0 | \n",
" 11062.0 | \n",
" 276.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 1.778253e+32 | \n",
"
\n",
" \n",
" 43346 | \n",
" 101.0 | \n",
" 105549.0 | \n",
" 102635.0 | \n",
" 10140.0 | \n",
" 26943.0 | \n",
" 11499.0 | \n",
" 110516.0 | \n",
" 21899.0 | \n",
" 11861.0 | \n",
" 10561.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 1.762300e+32 | \n",
"
\n",
" \n",
" 43347 | \n",
" 101.0 | \n",
" 81424.0 | \n",
" 26398.0 | \n",
" 92017.0 | \n",
" 109620.0 | \n",
" 10941.0 | \n",
" 76010.0 | \n",
" 10115.0 | \n",
" 19830.0 | \n",
" 26083.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 1.564075e+32 | \n",
"
\n",
" \n",
" 43348 | \n",
" 101.0 | \n",
" 39774.0 | \n",
" 11127.0 | \n",
" 45989.0 | \n",
" 24596.0 | \n",
" 11933.0 | \n",
" 170.0 | \n",
" 17145.0 | \n",
" 10710.0 | \n",
" 39125.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 2.685411e+32 | \n",
"
\n",
" \n",
"
\n",
"
43349 rows × 67 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n",
"1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n",
"2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n",
"3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n",
"4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n",
"... ... ... ... ... ... ... ... \n",
"43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n",
"43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n",
"43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n",
"43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n",
"43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n",
"\n",
" 7 8 9 ... 57 58 59 60 61 62 63 \\\n",
"0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... ... ... ... ... ... ... \n",
"43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" labels Anomaly Anomaly_Score \n",
"0 0.0 0 1.354399e+32 \n",
"1 0.0 0 1.311723e+32 \n",
"2 0.0 0 1.597792e+32 \n",
"3 0.0 0 1.551488e+32 \n",
"4 0.0 0 1.348867e+32 \n",
"... ... ... ... \n",
"43344 1.0 0 2.346619e+32 \n",
"43345 1.0 0 1.778253e+32 \n",
"43346 1.0 0 1.762300e+32 \n",
"43347 1.0 0 1.564075e+32 \n",
"43348 1.0 0 2.685411e+32 \n",
"\n",
"[43349 rows x 67 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pca = create_model('pca')\n",
"pca_anomalies = assign_model(pca)\n",
"pca_df=embeding_df.drop(['tweet'], axis=1)\n",
"pca_pred = predict_model(pca, data=pca_df)\n",
"pca_pred"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 41181\n",
"1 2168\n",
"Name: Anomaly, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pca_pred['Anomaly'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"embeding_df.drop(pca_pred.loc[pca_pred['Anomaly']==1 ].index, inplace=True)\n",
"df=pd.DataFrame()\n",
"df['tweet']=embeding_df['tweet']\n",
"df['subtas_a']=embeding_df['labels']\n",
"df.to_csv('pca_outliers.csv') "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# abod"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" ... | \n",
" 57 | \n",
" 58 | \n",
" 59 | \n",
" 60 | \n",
" 61 | \n",
" 62 | \n",
" 63 | \n",
" labels | \n",
" Anomaly | \n",
" Anomaly_Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 101.0 | \n",
" 10110.0 | \n",
" 175.0 | \n",
" 78653.0 | \n",
" 189.0 | \n",
" 25285.0 | \n",
" 15976.0 | \n",
" 40840.0 | \n",
" 276.0 | \n",
" 31623.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" -7.719921e-22 | \n",
"
\n",
" \n",
" 1 | \n",
" 101.0 | \n",
" 11589.0 | \n",
" 10706.0 | \n",
" 10713.0 | \n",
" 10794.0 | \n",
" 94698.0 | \n",
" 30668.0 | \n",
" 24883.0 | \n",
" 117.0 | \n",
" 23763.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" -4.030618e-21 | \n",
"
\n",
" \n",
" 2 | \n",
" 101.0 | \n",
" 148.0 | \n",
" 30471.0 | \n",
" 10774.0 | \n",
" 13785.0 | \n",
" 13779.0 | \n",
" 33642.0 | \n",
" 14399.0 | \n",
" 48271.0 | \n",
" 76686.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" -3.558939e-22 | \n",
"
\n",
" \n",
" 3 | \n",
" 101.0 | \n",
" 19319.0 | \n",
" 16724.0 | \n",
" 10118.0 | \n",
" 10107.0 | \n",
" 78323.0 | \n",
" 12407.0 | \n",
" 38959.0 | \n",
" 22934.0 | \n",
" 10147.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" -2.895136e-22 | \n",
"
\n",
" \n",
" 4 | \n",
" 101.0 | \n",
" 30932.0 | \n",
" 58706.0 | \n",
" 58054.0 | \n",
" 44907.0 | \n",
" 10224.0 | \n",
" 106583.0 | \n",
" 10288.0 | \n",
" 12524.0 | \n",
" 13878.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" -4.832515e-21 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 43344 | \n",
" 101.0 | \n",
" 20065.0 | \n",
" 10161.0 | \n",
" 115.0 | \n",
" 115.0 | \n",
" 103784.0 | \n",
" 10774.0 | \n",
" 21388.0 | \n",
" 10245.0 | \n",
" 92067.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" -7.984637e-25 | \n",
"
\n",
" \n",
" 43345 | \n",
" 101.0 | \n",
" 139.0 | \n",
" 80839.0 | \n",
" 24109.0 | \n",
" 13406.0 | \n",
" 18985.0 | \n",
" 16285.0 | \n",
" 10163.0 | \n",
" 11062.0 | \n",
" 276.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" -1.059387e-22 | \n",
"
\n",
" \n",
" 43346 | \n",
" 101.0 | \n",
" 105549.0 | \n",
" 102635.0 | \n",
" 10140.0 | \n",
" 26943.0 | \n",
" 11499.0 | \n",
" 110516.0 | \n",
" 21899.0 | \n",
" 11861.0 | \n",
" 10561.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" -3.592603e-22 | \n",
"
\n",
" \n",
" 43347 | \n",
" 101.0 | \n",
" 81424.0 | \n",
" 26398.0 | \n",
" 92017.0 | \n",
" 109620.0 | \n",
" 10941.0 | \n",
" 76010.0 | \n",
" 10115.0 | \n",
" 19830.0 | \n",
" 26083.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" -2.226002e-21 | \n",
"
\n",
" \n",
" 43348 | \n",
" 101.0 | \n",
" 39774.0 | \n",
" 11127.0 | \n",
" 45989.0 | \n",
" 24596.0 | \n",
" 11933.0 | \n",
" 170.0 | \n",
" 17145.0 | \n",
" 10710.0 | \n",
" 39125.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" -2.864757e-23 | \n",
"
\n",
" \n",
"
\n",
"
43349 rows × 67 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n",
"1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n",
"2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n",
"3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n",
"4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n",
"... ... ... ... ... ... ... ... \n",
"43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n",
"43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n",
"43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n",
"43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n",
"43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n",
"\n",
" 7 8 9 ... 57 58 59 60 61 62 63 \\\n",
"0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... ... ... ... ... ... ... \n",
"43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" labels Anomaly Anomaly_Score \n",
"0 0.0 0 -7.719921e-22 \n",
"1 0.0 0 -4.030618e-21 \n",
"2 0.0 0 -3.558939e-22 \n",
"3 0.0 0 -2.895136e-22 \n",
"4 0.0 0 -4.832515e-21 \n",
"... ... ... ... \n",
"43344 1.0 0 -7.984637e-25 \n",
"43345 1.0 0 -1.059387e-22 \n",
"43346 1.0 0 -3.592603e-22 \n",
"43347 1.0 0 -2.226002e-21 \n",
"43348 1.0 0 -2.864757e-23 \n",
"\n",
"[43349 rows x 67 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"abod = create_model('abod')\n",
"abod_anomalies = assign_model(abod)\n",
"abod_df=embeding_df.drop(['tweet'], axis=1)\n",
"abod_pred = predict_model(abod, data=abod_df)\n",
"abod_pred"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 43349\n",
"Name: Anomaly, dtype: int64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"abod_pred['Anomaly'].value_counts()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# cluster"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Initiated | \n",
" . . . . . . . . . . . . . . . . . . | \n",
" 16:19:48 | \n",
"
\n",
" \n",
" Status | \n",
" . . . . . . . . . . . . . . . . . . | \n",
" Fitting 0.05 Fraction | \n",
"
\n",
" \n",
" Estimator | \n",
" . . . . . . . . . . . . . . . . . . | \n",
" Clustering-Based Local Outlier | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" \n",
" \n",
"Initiated . . . . . . . . . . . . . . . . . . 16:19:48\n",
"Status . . . . . . . . . . . . . . . . . . Fitting 0.05 Fraction\n",
"Estimator . . . . . . . . . . . . . . . . . . Clustering-Based Local Outlier"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" ... | \n",
" 57 | \n",
" 58 | \n",
" 59 | \n",
" 60 | \n",
" 61 | \n",
" 62 | \n",
" 63 | \n",
" labels | \n",
" Anomaly | \n",
" Anomaly_Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 101.0 | \n",
" 10110.0 | \n",
" 175.0 | \n",
" 78653.0 | \n",
" 189.0 | \n",
" 25285.0 | \n",
" 15976.0 | \n",
" 40840.0 | \n",
" 276.0 | \n",
" 31623.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 123828.159076 | \n",
"
\n",
" \n",
" 1 | \n",
" 101.0 | \n",
" 11589.0 | \n",
" 10706.0 | \n",
" 10713.0 | \n",
" 10794.0 | \n",
" 94698.0 | \n",
" 30668.0 | \n",
" 24883.0 | \n",
" 117.0 | \n",
" 23763.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 112972.396566 | \n",
"
\n",
" \n",
" 2 | \n",
" 101.0 | \n",
" 148.0 | \n",
" 30471.0 | \n",
" 10774.0 | \n",
" 13785.0 | \n",
" 13779.0 | \n",
" 33642.0 | \n",
" 14399.0 | \n",
" 48271.0 | \n",
" 76686.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 145701.165368 | \n",
"
\n",
" \n",
" 3 | \n",
" 101.0 | \n",
" 19319.0 | \n",
" 16724.0 | \n",
" 10118.0 | \n",
" 10107.0 | \n",
" 78323.0 | \n",
" 12407.0 | \n",
" 38959.0 | \n",
" 22934.0 | \n",
" 10147.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 141686.216880 | \n",
"
\n",
" \n",
" 4 | \n",
" 101.0 | \n",
" 30932.0 | \n",
" 58706.0 | \n",
" 58054.0 | \n",
" 44907.0 | \n",
" 10224.0 | \n",
" 106583.0 | \n",
" 10288.0 | \n",
" 12524.0 | \n",
" 13878.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0 | \n",
" 101399.757887 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 43344 | \n",
" 101.0 | \n",
" 20065.0 | \n",
" 10161.0 | \n",
" 115.0 | \n",
" 115.0 | \n",
" 103784.0 | \n",
" 10774.0 | \n",
" 21388.0 | \n",
" 10245.0 | \n",
" 92067.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 193403.127721 | \n",
"
\n",
" \n",
" 43345 | \n",
" 101.0 | \n",
" 139.0 | \n",
" 80839.0 | \n",
" 24109.0 | \n",
" 13406.0 | \n",
" 18985.0 | \n",
" 16285.0 | \n",
" 10163.0 | \n",
" 11062.0 | \n",
" 276.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 154821.530684 | \n",
"
\n",
" \n",
" 43346 | \n",
" 101.0 | \n",
" 105549.0 | \n",
" 102635.0 | \n",
" 10140.0 | \n",
" 26943.0 | \n",
" 11499.0 | \n",
" 110516.0 | \n",
" 21899.0 | \n",
" 11861.0 | \n",
" 10561.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 166024.182457 | \n",
"
\n",
" \n",
" 43347 | \n",
" 101.0 | \n",
" 81424.0 | \n",
" 26398.0 | \n",
" 92017.0 | \n",
" 109620.0 | \n",
" 10941.0 | \n",
" 76010.0 | \n",
" 10115.0 | \n",
" 19830.0 | \n",
" 26083.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 130852.856537 | \n",
"
\n",
" \n",
" 43348 | \n",
" 101.0 | \n",
" 39774.0 | \n",
" 11127.0 | \n",
" 45989.0 | \n",
" 24596.0 | \n",
" 11933.0 | \n",
" 170.0 | \n",
" 17145.0 | \n",
" 10710.0 | \n",
" 39125.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 206109.572124 | \n",
"
\n",
" \n",
"
\n",
"
43349 rows × 67 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n",
"1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n",
"2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n",
"3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n",
"4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n",
"... ... ... ... ... ... ... ... \n",
"43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n",
"43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n",
"43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n",
"43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n",
"43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n",
"\n",
" 7 8 9 ... 57 58 59 60 61 62 63 \\\n",
"0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... ... ... ... ... ... ... \n",
"43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" labels Anomaly Anomaly_Score \n",
"0 0.0 0 123828.159076 \n",
"1 0.0 0 112972.396566 \n",
"2 0.0 0 145701.165368 \n",
"3 0.0 0 141686.216880 \n",
"4 0.0 0 101399.757887 \n",
"... ... ... ... \n",
"43344 1.0 0 193403.127721 \n",
"43345 1.0 0 154821.530684 \n",
"43346 1.0 0 166024.182457 \n",
"43347 1.0 0 130852.856537 \n",
"43348 1.0 0 206109.572124 \n",
"\n",
"[43349 rows x 67 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cluster = create_model('cluster')\n",
"cluster_anomalies = assign_model(cluster)\n",
"cluster_df=embeding_df.drop(['tweet'], axis=1)\n",
"cluster_pred = predict_model(cluster, data=cluster_df)\n",
"cluster_pred"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 41182\n",
"1 2167\n",
"Name: Anomaly, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cluster_pred['Anomaly'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"embeding_df.drop(cluster_pred.loc[cluster_pred['Anomaly']==1 ].index, inplace=True)\n",
"df=pd.DataFrame()\n",
"df['tweet']=embeding_df['tweet']\n",
"df['subtas_a']=embeding_df['labels']\n",
"df.to_csv('cluster_outliers.csv') "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# cof"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cof = create_model('cof')\n",
"cof_anomalies = assign_model(cof)\n",
"cof_df=embeding_df.drop(['tweet'], axis=1)\n",
"cof_pred = predict_model(cof, data=cof_df)\n",
"cof_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cof_pred['Anomaly'].value_counts()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# histogram"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"histogram = create_model('histogram')\n",
"histogram_anomalies = assign_model(histogram)\n",
"histogram_df=embeding_df.drop(['tweet'], axis=1)\n",
"histogram_pred = predict_model(histogram, data=histogram_df)\n",
"histogram_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"histogram_pred['Anomaly'].value_counts()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# lof"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lof = create_model('lof')\n",
"lof_anomalies = assign_model(lof)\n",
"lof_df=embeding_df.drop(['tweet'], axis=1)\n",
"lof_pred = predict_model(lof, data=lof_df)\n",
"lof_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lof_pred['Anomaly'].value_counts()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# svm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lof = create_model('lof')\n",
"lof_anomalies = assign_model(lof)\n",
"lof_df=embeding_df.drop(['tweet'], axis=1)\n",
"lof_pred = predict_model(lof, data=lof_df)\n",
"lof_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lof_pred['Anomaly'].value_counts()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# mcd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mcd = create_model('mcd')\n",
"mcd_anomalies = assign_model(mcd)\n",
"mcd_df=embeding_df.drop(['tweet'], axis=1)\n",
"mcd_pred = predict_model(mcd, data=mcd_df)\n",
"mcd_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mcd_pred['Anomaly'].value_counts()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# sod"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sod = create_model('sod')\n",
"sod_anomalies = assign_model(sod)\n",
"sod_df=embeding_df.drop(['tweet'], axis=1)\n",
"sod_pred = predict_model(sod, data=sod_df)\n",
"sod_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sod_pred['Anomaly'].value_counts()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# sos"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sos = create_model('sos')\n",
"sos_anomalies = assign_model(sos)\n",
"sos_df=embeding_df.drop(['tweet'], axis=1)\n",
"sos_pred = predict_model(sos, data=sos_df)\n",
"sos_pred"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sos_pred['Anomaly'].value_counts()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "dl_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}