{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import csv\n", "import pandas as pd\n", "import numpy as np\n", "import os\n", "from geopy.distance import geodesic as GD\n", "from datetime import datetime, timedelta\n", "import matplotlib" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
usernamelogin_timetyping_speeddevice_namedevice_uuidmac_addressdevice_vendordevice_modeldevice_ramip_v4...is_vpnis_proxyis_toris_relaylatlonsuburddistrictcitycountry
login_id
1lilidora2022-06-01 07:24:04201.449Lilis-Air11741651319955680:65:7c:e7:9d:8fApple, Inc.macOS-12.5.1-arm64-arm-64bit16 GB123.20.144.183...000010.740240106.699547Phường Tân HưngQuận 7Thành phố Hồ Chí MinhViệt Nam
2thaole2022-06-01 10:00:1080.167Net-desktop-43000000000000048c:7a:aa:ef:d7:04DellDell0043GB183.81.124.2...000010.772197106.661671Phường 14Quận 10Thành phố Hồ Chí MinhViệt Nam
3linhvuu2022-06-01 19:12:03192.980Linhs-MacBook-Air-2.local242919516792568c:7a:aa:f2:e5:88Apple, Inc.macOS-12.5.1-arm64-arm-64bit16 GB125.235.208.251...000010.786197106.696813Phường Đa KaoQuận 1Thành phố Hồ Chí MinhViệt Nam
\n", "

3 rows × 27 columns

\n", "
" ], "text/plain": [ " username login_time typing_speed \\\n", "login_id \n", "1 lilidora 2022-06-01 07:24:04 201.449 \n", "2 thaole 2022-06-01 10:00:10 80.167 \n", "3 linhvuu 2022-06-01 19:12:03 192.980 \n", "\n", " device_name device_uuid mac_address \\\n", "login_id \n", "1 Lilis-Air 117416513199556 80:65:7c:e7:9d:8f \n", "2 Net-desktop-4 300000000000004 8c:7a:aa:ef:d7:04 \n", "3 Linhs-MacBook-Air-2.local 24291951679256 8c:7a:aa:f2:e5:88 \n", "\n", " device_vendor device_model device_ram \\\n", "login_id \n", "1 Apple, Inc. macOS-12.5.1-arm64-arm-64bit 16 GB \n", "2 Dell Dell004 3GB \n", "3 Apple, Inc. macOS-12.5.1-arm64-arm-64bit 16 GB \n", "\n", " ip_v4 ... is_vpn is_proxy is_tor is_relay lat \\\n", "login_id ... \n", "1 123.20.144.183 ... 0 0 0 0 10.740240 \n", "2 183.81.124.2 ... 0 0 0 0 10.772197 \n", "3 125.235.208.251 ... 0 0 0 0 10.786197 \n", "\n", " lon suburd district city \\\n", "login_id \n", "1 106.699547 Phường Tân Hưng Quận 7 Thành phố Hồ Chí Minh \n", "2 106.661671 Phường 14 Quận 10 Thành phố Hồ Chí Minh \n", "3 106.696813 Phường Đa Kao Quận 1 Thành phố Hồ Chí Minh \n", "\n", " country \n", "login_id \n", "1 Việt Nam \n", "2 Việt Nam \n", "3 Việt Nam \n", "\n", "[3 rows x 27 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Set up the input database\n", "# os.getcwd()\n", "# link_csv = 'Users/USER1/Documents/GitHub/BankingATODetectionModule/streamlit_webapp/dummy_db.csv'\n", "os.chdir('/Users/USER1/Documents/GitHub/BankingATODetectionModule/streamlit_webapp')\n", "db = pd.read_csv('dummy_db.csv',index_col='login_id')\n", "db.head(3)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# convert the data\n", "db = db.astype({\"device_uuid\":\"str\"})" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['username', 'login_time', 'typing_speed', 'device_name', 'device_uuid',\n", " 'mac_address', 'device_vendor', 'device_model', 'device_ram', 'ip_v4',\n", " 'ip_country', 'ip_region', 'ip_city', 'ip_lat', 'ip_lon', 'isp_name',\n", " 'isp_org', 'is_vpn', 'is_proxy', 'is_tor', 'is_relay', 'lat', 'lon',\n", " 'suburd', 'district', 'city', 'country'],\n", " dtype='object')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db.columns" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "fields = ['username', 'login_time', 'typing_speed', 'device_name', 'device_uuid',\n", " 'mac_address', 'device_vendor', 'device_model', 'device_ram', 'ip_v4',\n", " 'ip_country', 'ip_region', 'ip_city', 'ip_lat', 'ip_lon', 'isp_name',\n", " 'isp_org', 'is_vpn', 'is_proxy', 'is_tor', 'is_relay', 'lat', 'lon',\n", " 'suburd', 'district', 'city', 'country']" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "usecase1 = {'username': 'linhvuu',\n", " 'login_time': '2022-06-01 19:12:03',\n", " 'typing_speed': 178.675,\n", " 'device_name': 'Linhs-MacBook-Air-2.local',\n", " 'device_uuid': '24291951679256',\n", " 'mac_address': '8c:7a:aa:f2:e5:88',\n", " 'device_vendor': 'Apple, Inc.',\n", " 'device_model': 'macOS-12.5.1-arm64-arm-64bit',\n", " 'device_ram': '16 GB',\n", " 'ip_v4': '125.235.208.251',\n", " 'ip_country': 'Vietnam',\n", " 'ip_region': 'Ho Chi Minh',\n", " 'ip_city': 'Ho Chi Minh City',\n", " 'ip_lat': 10.75,\n", " 'ip_lon': 106.666672,\n", " 'isp_name': 'Viettel Corporation',\n", " 'isp_org': 'Viettel Corporation',\n", " 'is_vpn': 0,\n", " 'is_proxy': 0,\n", " 'is_tor': 0,\n", " 'is_relay': 0,\n", " 'lat': 10.78619741,\n", " 'lon': 106.6968128,\n", " 'suburd': 'Phường Đa Kao',\n", " 'district': 'Quận 1',\n", " 'city': 'Thành phố Hồ Chí Minh',\n", " 'country': 'Việt Nam'}" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lilidora\n", "thaole\n" ] } ], "source": [ "usecase3 = db.to_dict(orient='records')[0]\n", "usecase2 = db.to_dict(orient='records')[1]\n", "print(usecase3['username'])\n", "print(usecase2['username'])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'device_uuid': 40, 'mac_address': 40, 'device_name': 30.0, 'device_model': 20.0, 'device_vendor': 4.0, 'ip_v4': 30, 'isp_name': 15.0, 'ip_country': 3.0, 'suburd': 30, 'district': 22.5, 'city': 15.0, 'country': 3.0}\n" ] } ], "source": [ "# lookup weight\n", "# total score = 100\n", "device_max = 40\n", "ip_max = 30\n", "location_max = 30\n", "wt = {'H':1,'MH':0.75,'M':0.5,'L':0.1}\n", "w_item = {\n", " 'device_uuid': 'H',\n", " 'mac_address': 'H',\n", " 'device_name': 'MH', \n", " 'device_model': 'M',\n", " 'device_vendor': 'L',\n", "\n", " 'ip_v4': 'H',\n", " 'isp_name': 'M',\n", " 'ip_country': 'L',\n", "\n", " 'suburd': 'H',\n", " 'district': 'MH',\n", " 'city': 'M',\n", " 'country': 'L'}\n", " \n", "weight = {}\n", "for i in w_item.keys():\n", " if i in [ 'ip_country', 'isp_name','ip_v4']:\n", " weight[i] = wt[w_item[i]] * ip_max\n", " elif i in ['country','city', 'district','suburd']:\n", " weight[i] = wt[w_item[i]] * location_max\n", " else:\n", " weight[i] = wt[w_item[i]] * device_max\n", "print(weight)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "40" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weight['device_uuid']" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "Step to do with the logic\n", "- General ideas: 1 new transaction => compare with historical data of this user\n", "- List of def:\n", " 1. Def check match for each group (location, ip, device): compare from the lowest filed country => city => district => suburb => get the score\n", " 1.1 Def check match: return the highest field matched\n", " 1.2 Def lookup_weight: from the match get the score\n", " 1.1.2 Def to check the percentage of this value in total values\n", " \n", " 2. Def to check score_jumping: velocity (H: 600+, M: 80 - 600, S: 40 -80) and frequency_jumping (rare or ussually)\n", " 2.1 Def cal all velocity\n", " 3. Def to add-in conditions:\n", " 3.1 check vpn (new IP + vpn)\n", " 3.2 Check typing speed (IQR)\n", " 4. Def to get_score:\n", " => The score = score_device * per_app + score_ip * per_app + score_location * per_app (- score_jumping - typing_speed - vpn_check: new + vpn)\n", " 5. Def to return result: compare with a threshold and pass or fail\n", "\n", "Next steps: model de thay dooi trong so???\n", "Làm sao cái mặt tĩnh (liệu có bắt được không)\n", "\n", "Feedback:\n", "- Noi đây là demo (sẽ tích hợp vào app ngân hàng) .\n", "- Use case: pass + fail + travel new device => Giới thiệu thêm những cái khác mình có thể làm được?\n", "- Demo đơn giản (present không \n", "Nhấn mạnh:\n", "- Tại sao: (không ảnh hưởng UIUX, privatcy, không thay đổi thói quen người dùng) cái mình làm được + và không làm mất cái khác\n", "- Tại sao cần team mình: chuyên sâu và tiết kiệm, resource\n", "\n", "Ngân hàng có muốn share data với mình không? khả thi là mình tự collect (không sensitive, có sensor)\n", "Vài câu hỏi về biz: tính tiền làm sao, có đổi thủ không?" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "40" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#1.2 to get the weight of each value\n", "match = 'device_uuid'\n", "weight[match]" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "range is from 44.0 to 55\n" ] }, { "data": { "text/plain": [ "('device_uuid', 1)" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 1.1 to check match for each group \n", "user = usecase1\n", "location_fields = [ 'country','city', 'district','suburd']\n", "input_u = {'suburd': 'Phường 5',\n", " 'district': 'Quận 999',\n", " 'city': 'Thành phố Hồ Chí Minh',\n", " 'country': 'Việt Nam'}\n", "user_db = db[db.username==user['username']]\n", "def check_match_per(user,user_db,check = 'loacation'):\n", " \"\"\"\n", " input the check is one of 'location','device','ip'\n", " \"\"\"\n", " match_value = ''\n", " per = 0\n", " total_txn = len(user_db)\n", " if check == 'location':\n", " fields_check = ['country','city', 'district','suburd']\n", " elif check == 'device':\n", " fields_check = ['device_vendor','device_model','device_name','mac_address','device_uuid']\n", " else:\n", " fields_check = [ 'ip_country', 'isp_name','ip_v4',] \n", " for i in fields_check:\n", " count = len(user_db[user_db[i] == user[i]])\n", " if count > 0:\n", " # if user[i] in user_db[i].values and user[i] != '':\n", " match_value = i\n", " per = count/total_txn\n", "\n", " elif i == 'mac_address':\n", " continue\n", " else:\n", " break\n", " # print(f'Match value is {match_value}')\n", " final_per = get_right_per(user_dict,user_db,match_value,per)\n", " return match_value,final_per\n", "\n", "check_match_per(user,user_db,check='device')" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'match_value' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn [52], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m get_right_per(user_dict,user_db,\u001b[43mmatch_value\u001b[49m,per)\n", "\u001b[0;31mNameError\u001b[0m: name 'match_value' is not defined" ] } ], "source": [ "get_right_per(user_dict,user_db,match_value,per)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "# to get per of the highest points\n", "# get all the values with count in range ~20% less than the highest\n", "def get_right_per(user_dict,user_db,match_value,per):\n", " range_highest = 0.2\n", " count_all = user_db.groupby(match_value).count().login_time\n", " up = count_all.max()\n", " down = up*(1-range_highest)\n", " print(f'range is from {down} to {up}')\n", " if count_all[user_dict[match_value]] >= down and count_all[user_dict[match_value]] <= up:\n", " return 1\n", " else:\n", " return per" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# 2.1 Get velocity of all transactions\n", "def get_vel_all(row):\n", " # print(row)\n", " dist = 0\n", " coor = (row['lat'],row['lon'])\n", " coor_pre = (row['pre_lat'],row['pre_lon'])\n", " interval = ''\n", " vel = ''\n", " if coor_pre != (0,0):\n", " dist = GD(coor,coor_pre).km\n", " interval = (row['login_time'] - row['pre_time']).total_seconds()/(60*60)\n", " # interval = (row['login_time'] - row['pre_time']).days\n", " vel = dist/interval\n", " return vel" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/l8/r1y7_13x2mq324h61cc4gk4r0000gp/T/ipykernel_70337/224041067.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " user_db.login_time = pd.to_datetime(user_db.login_time)\n", "/var/folders/l8/r1y7_13x2mq324h61cc4gk4r0000gp/T/ipykernel_70337/224041067.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " user_db['pre_lat'] = user_db['lat'].shift(periods=1, fill_value=0)\n", "/var/folders/l8/r1y7_13x2mq324h61cc4gk4r0000gp/T/ipykernel_70337/224041067.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " user_db['pre_lon'] = user_db['lon'].shift(periods=1, fill_value=0)\n", "/var/folders/l8/r1y7_13x2mq324h61cc4gk4r0000gp/T/ipykernel_70337/224041067.py:5: FutureWarning: Passing to shift is deprecated and will raise in a future version, pass Timestamp instead.\n", " user_db['pre_time'] = user_db['login_time'].shift(periods=1,fill_value=0)\n", "/var/folders/l8/r1y7_13x2mq324h61cc4gk4r0000gp/T/ipykernel_70337/224041067.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " user_db['pre_time'] = user_db['login_time'].shift(periods=1,fill_value=0)\n", "/var/folders/l8/r1y7_13x2mq324h61cc4gk4r0000gp/T/ipykernel_70337/224041067.py:6: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " user_db['vel'] = user_db.apply(lambda x: get_vel_all(x),axis=1)\n" ] } ], "source": [ "# 2. Def to check score_jumping: velocity (H: 600+, M: 80 - 600, S: 40<80) and frequency_jumping (rare or ussually)\n", "user_db.login_time = pd.to_datetime(user_db.login_time)\n", "user_db['pre_lat'] = user_db['lat'].shift(periods=1, fill_value=0)\n", "user_db['pre_lon'] = user_db['lon'].shift(periods=1, fill_value=0)\n", "user_db['pre_time'] = user_db['login_time'].shift(periods=1,fill_value=0)\n", "user_db['vel'] = user_db.apply(lambda x: get_vel_all(x),axis=1)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-0.003112011442430221" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 2.2 get vel of the latest txn\n", "user\n", "\n", "def get_vel_txn(usecase,user_db):\n", " \"\"\"\n", " get the velocity of the new transaction in usecase and the latest transaction in user's history\n", " \"\"\"\n", " dist = 0\n", " interval = ''\n", " latest_txn = user_db.iloc[[-1]]\n", " coor_txn = (usecase['lat'],usecase['lon'])\n", " coor_latest = (float(latest_txn['lat']),float(latest_txn['lon']))\n", " dist = GD(coor_txn,coor_latest).km\n", "\n", " time_txn = datetime.strptime(user['login_time'],'%Y-%m-%d %H:%M:%S')\n", " interval = (time_txn - latest_txn['login_time'].to_list()[0]).total_seconds()/(60*60)\n", " try: \n", " vel = dist/interval\n", " except:\n", " vel = 0\n", " return(vel)\n", " # print(vel)\n", "\n", "get_vel_txn(user,user_db)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'username': 'linhvuu',\n", " 'login_time': '2022-06-01 19:12:03',\n", " 'typing_speed': 178.675,\n", " 'device_name': 'Linhs-MacBook-Air-2.local',\n", " 'device_uuid': '24291951679256',\n", " 'mac_address': '8c:7a:aa:f2:e5:88',\n", " 'device_vendor': 'Apple, Inc.',\n", " 'device_model': 'macOS-12.5.1-arm64-arm-64bit',\n", " 'device_ram': '16 GB',\n", " 'ip_v4': '125.235.208.251',\n", " 'ip_country': 'Vietnam',\n", " 'ip_region': 'Ho Chi Minh',\n", " 'ip_city': 'Ho Chi Minh City',\n", " 'ip_lat': 10.75,\n", " 'ip_lon': 106.666672,\n", " 'isp_name': 'Viettel Corporation',\n", " 'isp_org': 'Viettel Corporation',\n", " 'is_vpn': 0,\n", " 'is_proxy': 0,\n", " 'is_tor': 0,\n", " 'is_relay': 0,\n", " 'lat': 10.78619741,\n", " 'lon': 106.6968128,\n", " 'suburd': 'Phường Đa Kao',\n", " 'district': 'Quận 1',\n", " 'city': 'Thành phố Hồ Chí Minh',\n", " 'country': 'Việt Nam'}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "user_dict= user" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# 2 to get score will be reduced because of jumping\n", "def get_score_jump (user_dict,user_db):\n", " threshold_vel = {\n", " 'H':600,\n", " 'M':80,\n", " 'L':40,\n", " 'frequency':0.1\n", " }\n", " weight_vel = {\n", " 'H':20,\n", " 'M':10,\n", " \"L\":5\n", " }\n", " many_vel = {\n", " 'Y':0.1,\n", " 'N':1\n", " }\n", "\n", " # to get how many jumping - low or not\n", " count_jump = len(user_db[user_db.apply(lambda x: float(x.vel) > threshold_vel['L'] if x.vel != \"\" else False,axis=1)])\n", "\n", " if count_jump > len(user_db)*threshold_vel['frequency']:\n", " many_jump = 'Y'\n", " else:\n", " many_jump = 'N'\n", "\n", " vel_txn = float(get_vel_txn(user_dict,user_db))\n", "\n", "\n", " if vel_txn > threshold_vel['H']:\n", " score_jump = many_vel[many_jump] * weight_vel['H']\n", " elif vel_txn > threshold_vel['M']:\n", " score_jump = many_vel[many_jump] * weight_vel['M']\n", " elif vel_txn > threshold_vel['L']:\n", " score_jump = many_vel[many_jump] * weight_vel['L']\n", " else:\n", " score_jump = 0\n", "\n", " return(score_jump)\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_score_jump (user_dict,user_db)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Match value is ip_v4\n" ] }, { "data": { "text/plain": [ "0" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 3.1 check vpn (new IP + vpn)\n", "\n", "\n", "def get_vpn_score (user_dict,user_db):\n", " vpn_fields = ['is_vpn',\n", " 'is_proxy',\n", " 'is_tor',\n", " 'is_relay']\n", " weight_vpn = 10\n", " vpn_count = 0\n", " for i in vpn_fields:\n", " vpn_count += user[i]\n", " if check_match_per(user_dict,user_db,check = 'ip')[0] != 'ip_v4' and vpn_count > 0:\n", " return weight_vpn\n", " else:\n", " return 0\n", "\n", "get_vpn_score(user,user_db)\n" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 3.2 Check typing speed (IQR)\n", "weight_typing = 10\n", "def check_typing_speed (user,user_db):\n", " Q1 = np.percentile(user_db.typing_speed,25)\n", " Q3 = np.percentile(user_db.typing_speed,75)\n", " IQR = Q3-Q1\n", " # print(IQR)\n", " ll = Q1 - 1.5*IQR\n", " ul = Q3 + 1.5*IQR\n", " if user['typing_speed'] < ll or user['typing_speed'] > ul:\n", " return weight_typing\n", " else:\n", " return 0\n", "\n", "typing_score = check_typing_speed(user,user_db)\n", "# typing_score = check_typing_speed(test,user_db)\n", "typing_score" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'username': 'linhvuu',\n", " 'login_time': '2022-06-01 19:12:03',\n", " 'typing_speed': 178.675,\n", " 'device_name': 'Linhs-MacBook-Air-2.local',\n", " 'device_uuid': '24291951679256',\n", " 'mac_address': '8c:7a:aa:f2:e5:88',\n", " 'device_vendor': 'Apple, Inc.',\n", " 'device_model': 'macOS-12.5.1-arm64-arm-64bit',\n", " 'device_ram': '16 GB',\n", " 'ip_v4': '125.235.208.251',\n", " 'ip_country': 'Vietnam',\n", " 'ip_region': 'Ho Chi Minh',\n", " 'ip_city': 'Ho Chi Minh City',\n", " 'ip_lat': 10.75,\n", " 'ip_lon': 106.666672,\n", " 'isp_name': 'Viettel Corporation',\n", " 'isp_org': 'Viettel Corporation',\n", " 'is_vpn': 0,\n", " 'is_proxy': 0,\n", " 'is_tor': 0,\n", " 'is_relay': 0,\n", " 'lat': 10.78619741,\n", " 'lon': 106.6968128,\n", " 'suburd': 'Phường Đa Kao',\n", " 'district': 'Quận 1',\n", " 'city': 'Thành phố Hồ Chí Minh',\n", " 'country': 'Việt Nam'}" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 4. Def to get_score:\n", "# => The score = score_device * per_app + score_ip * per_app + score_location * per_app (- score_jumping - typing_speed - vpn_check: new + vpn)\n", "user" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
usernamelogin_timetyping_speeddevice_namedevice_uuidmac_addressdevice_vendordevice_modeldevice_ramip_v4...is_vpnis_proxyis_toris_relaylatlonsuburddistrictcitycountry
login_id
3linhvuu2022-06-01 19:12:03192.980Linhs-MacBook-Air-2.local242919516792568c:7a:aa:f2:e5:88Apple, Inc.macOS-12.5.1-arm64-arm-64bit16 GB125.235.208.251...000010.786197106.696813Phường Đa KaoQuận 1Thành phố Hồ Chí MinhViệt Nam
7linhvuu2022-06-03 01:21:01133.234Linhs-Samsung10000000000000028c:7a:aa:f2:e5:02SamsungSamsung0012GB18.162.78.188...100010.737127106.660850Phường 5Quận 8Thành phố Hồ Chí MinhViệt Nam
10linhvuu2022-06-04 17:04:16170.171Linhs-MacBook-Air-2.local242919516792568c:7a:aa:f2:e5:88Apple, Inc.macOS-12.5.1-arm64-arm-64bit16 GB27.65.195.158...000010.737217106.661484Phường 5Quận 8Thành phố Hồ Chí MinhViệt Nam
\n", "

3 rows × 27 columns

\n", "
" ], "text/plain": [ " username login_time typing_speed \\\n", "login_id \n", "3 linhvuu 2022-06-01 19:12:03 192.980 \n", "7 linhvuu 2022-06-03 01:21:01 133.234 \n", "10 linhvuu 2022-06-04 17:04:16 170.171 \n", "\n", " device_name device_uuid mac_address \\\n", "login_id \n", "3 Linhs-MacBook-Air-2.local 24291951679256 8c:7a:aa:f2:e5:88 \n", "7 Linhs-Samsung 1000000000000002 8c:7a:aa:f2:e5:02 \n", "10 Linhs-MacBook-Air-2.local 24291951679256 8c:7a:aa:f2:e5:88 \n", "\n", " device_vendor device_model device_ram \\\n", "login_id \n", "3 Apple, Inc. macOS-12.5.1-arm64-arm-64bit 16 GB \n", "7 Samsung Samsung001 2GB \n", "10 Apple, Inc. macOS-12.5.1-arm64-arm-64bit 16 GB \n", "\n", " ip_v4 ... is_vpn is_proxy is_tor is_relay lat \\\n", "login_id ... \n", "3 125.235.208.251 ... 0 0 0 0 10.786197 \n", "7 18.162.78.188 ... 1 0 0 0 10.737127 \n", "10 27.65.195.158 ... 0 0 0 0 10.737217 \n", "\n", " lon suburd district city country \n", "login_id \n", "3 106.696813 Phường Đa Kao Quận 1 Thành phố Hồ Chí Minh Việt Nam \n", "7 106.660850 Phường 5 Quận 8 Thành phố Hồ Chí Minh Việt Nam \n", "10 106.661484 Phường 5 Quận 8 Thành phố Hồ Chí Minh Việt Nam \n", "\n", "[3 rows x 27 columns]" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_db.head(3)" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "user = {'username': 'lilidora',\n", " 'login_time': '2022-06-01 07:24:04',\n", " 'typing_speed': 201.449,\n", " 'device_name': 'Lilkjkis-Air',\n", " 'device_uuid': '11741651319fdsf9556',\n", " 'mac_address': '80:65:7csdfds:e7:9d:8f',\n", " 'device_vendor': 'Apple, Inc.',\n", " 'device_model': 'macOS-12.5.1-arm64-arm-64bit',\n", " 'device_ram': '16 GB',\n", " 'ip_v4': '123.20.fsdf44.183',\n", " 'ip_country': 'Vietnam',\n", " 'ip_region': 'Ho Chi Minh',\n", " 'ip_city': 'Ho Chi Minh City',\n", " 'ip_lat': 10.8326,\n", " 'ip_lon': 106.6581,\n", " 'isp_name': 'VietNam Post and Telecom Corporation',\n", " 'isp_org': '0',\n", " 'is_vpn': 0,\n", " 'is_proxy': 0,\n", " 'is_tor': 0,\n", " 'is_relay': 0,\n", " 'lat': 10.74024034,\n", " 'lon': 106.6995472,\n", " 'suburd': 'Phườngdfs Tân Hưng',\n", " 'district': 'Quận 7',\n", " 'city': 'Thành phố Hồ Chí Minh',\n", " 'country': 'Việt Nam'}" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lilidora\n", "device_model 12.0\n", "0.6\n", "device_model\n", "isp_name 12.75\n", "district 11.25\n", "total 36.0\n" ] } ], "source": [ "# user = usecase3\n", "user_db = db[db.username == user['username']]\n", "\n", "# 4.Get score\n", "def get_risk_score (user_dict,user_db):\n", " weight = {'device_uuid': 40, 'mac_address': 40, 'device_name': 30.0, 'device_model': 20.0, \\\n", "\t\t\t\t\t\t'device_vendor': 4.0, 'ip_v4': 30, 'isp_name': 15.0, 'ip_country': 3.0, 'suburb': 30, 'district': 22.5, \\\n", "\t\t\t\t\t\t\t'city': 15.0, 'country': 3.0}\n", " # check device\n", " device_match,device_per = check_match_per(user_dict,user_db,check='device')\n", " device_score = weight[device_match] * device_per\n", "\n", " # check ip\n", " ip_match,ip_per = check_match_per(user_dict,user_db,check='ip')\n", " ip_score = weight[ip_match] * ip_per\n", "\n", "\n", " # check location\n", " location_match,location_per = check_match_per(user_dict,user_db,check='location')\n", " location_score = weight[location_match] * location_per\n", " return device_score+ip_score+location_score\n", "\n", "\n", " print(f'total {device_score+ip_score+location_score}')" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "device_uuid\n", "1000000000000002 45\n", "24291951679256 55\n", "Name: login_time, dtype: int64" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_db.groupby('device_uuid').count().login_time" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "device_uuid\n", "1000000000000002 45\n", "24291951679256 55\n", "Name: login_time, dtype: int64" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db[db.username=='linhvuu'].groupby('device_uuid').count().login_time" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "device_uuid\n", "121577554615100 60\n", "300000000000003 25\n", "300000000000004 15\n", "Name: login_time, dtype: int64" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db[db.username=='thaole'].groupby('device_uuid').count().login_time" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "device_uuid\n", "117416513199556 60\n", "20000000000002 40\n", "Name: login_time, dtype: int64" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db[db.username=='lilidora'].groupby('device_uuid').count().login_time\n", "# highest and 105" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "user_dist = user" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "55" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "count_all[user_dist['device_uuid']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.7 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.7" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" } } }, "nbformat": 4, "nbformat_minor": 2 }