Spaces:
Sleeping
Sleeping
Commit
•
cf61aa1
1
Parent(s):
ad60c0d
Update featureExtractor.py
Browse files- featureExtractor.py +13 -12
featureExtractor.py
CHANGED
@@ -3,18 +3,19 @@ from urllib.parse import urlparse
|
|
3 |
import httpx
|
4 |
import pickle as pk
|
5 |
import pandas as pd
|
|
|
6 |
|
7 |
#Function to extract features
|
8 |
def featureExtraction(url):
|
9 |
|
10 |
features = []
|
11 |
#Address bar based features (12)
|
12 |
-
features.append(getLength(url))
|
13 |
-
features.append(getDepth(url))
|
14 |
-
features.append(tinyURL(url))
|
15 |
-
features.append(prefixSuffix(url))
|
16 |
-
features.append(no_of_dots(url))
|
17 |
-
features.append(sensitive_word(url))
|
18 |
|
19 |
|
20 |
domain_name = ''
|
@@ -25,8 +26,8 @@ def featureExtraction(url):
|
|
25 |
except:
|
26 |
dns = 1
|
27 |
|
28 |
-
features.append(1 if dns == 1 else domainAge(domain_name))
|
29 |
-
features.append(1 if dns == 1 else domainEnd(domain_name))
|
30 |
|
31 |
# HTML & Javascript based features (4)
|
32 |
dom = []
|
@@ -35,11 +36,11 @@ def featureExtraction(url):
|
|
35 |
except:
|
36 |
response = ""
|
37 |
|
38 |
-
dom.append(iframe(response))
|
39 |
-
dom.append(mouseOver(response))
|
40 |
-
dom.append(forwarding(response))
|
41 |
|
42 |
-
features.append(has_unicode(url)+haveAtSign(url)+havingIP(url))
|
43 |
|
44 |
with open('pca_model.pkl', 'rb') as file:
|
45 |
pca = pk.load(file)
|
|
|
3 |
import httpx
|
4 |
import pickle as pk
|
5 |
import pandas as pd
|
6 |
+
import extractorFunctions as ef
|
7 |
|
8 |
#Function to extract features
|
9 |
def featureExtraction(url):
|
10 |
|
11 |
features = []
|
12 |
#Address bar based features (12)
|
13 |
+
features.append(ef.getLength(url))
|
14 |
+
features.append(ef.getDepth(url))
|
15 |
+
features.append(ef.tinyURL(url))
|
16 |
+
features.append(ef.prefixSuffix(url))
|
17 |
+
features.append(ef.no_of_dots(url))
|
18 |
+
features.append(ef.sensitive_word(url))
|
19 |
|
20 |
|
21 |
domain_name = ''
|
|
|
26 |
except:
|
27 |
dns = 1
|
28 |
|
29 |
+
features.append(1 if dns == 1 else ef.domainAge(domain_name))
|
30 |
+
features.append(1 if dns == 1 else ef.domainEnd(domain_name))
|
31 |
|
32 |
# HTML & Javascript based features (4)
|
33 |
dom = []
|
|
|
36 |
except:
|
37 |
response = ""
|
38 |
|
39 |
+
dom.append(ef.iframe(response))
|
40 |
+
dom.append(ef.mouseOver(response))
|
41 |
+
dom.append(ef.forwarding(response))
|
42 |
|
43 |
+
features.append(ef.has_unicode(url)+ef.haveAtSign(url)+ef.havingIP(url))
|
44 |
|
45 |
with open('pca_model.pkl', 'rb') as file:
|
46 |
pca = pk.load(file)
|