Hussain033 commited on
Commit
cf61aa1
1 Parent(s): ad60c0d

Update featureExtractor.py

Browse files
Files changed (1) hide show
  1. featureExtractor.py +13 -12
featureExtractor.py CHANGED
@@ -3,18 +3,19 @@ from urllib.parse import urlparse
3
  import httpx
4
  import pickle as pk
5
  import pandas as pd
 
6
 
7
  #Function to extract features
8
  def featureExtraction(url):
9
 
10
  features = []
11
  #Address bar based features (12)
12
- features.append(getLength(url))
13
- features.append(getDepth(url))
14
- features.append(tinyURL(url))
15
- features.append(prefixSuffix(url))
16
- features.append(no_of_dots(url))
17
- features.append(sensitive_word(url))
18
 
19
 
20
  domain_name = ''
@@ -25,8 +26,8 @@ def featureExtraction(url):
25
  except:
26
  dns = 1
27
 
28
- features.append(1 if dns == 1 else domainAge(domain_name))
29
- features.append(1 if dns == 1 else domainEnd(domain_name))
30
 
31
  # HTML & Javascript based features (4)
32
  dom = []
@@ -35,11 +36,11 @@ def featureExtraction(url):
35
  except:
36
  response = ""
37
 
38
- dom.append(iframe(response))
39
- dom.append(mouseOver(response))
40
- dom.append(forwarding(response))
41
 
42
- features.append(has_unicode(url)+haveAtSign(url)+havingIP(url))
43
 
44
  with open('pca_model.pkl', 'rb') as file:
45
  pca = pk.load(file)
 
3
  import httpx
4
  import pickle as pk
5
  import pandas as pd
6
+ import extractorFunctions as ef
7
 
8
  #Function to extract features
9
  def featureExtraction(url):
10
 
11
  features = []
12
  #Address bar based features (12)
13
+ features.append(ef.getLength(url))
14
+ features.append(ef.getDepth(url))
15
+ features.append(ef.tinyURL(url))
16
+ features.append(ef.prefixSuffix(url))
17
+ features.append(ef.no_of_dots(url))
18
+ features.append(ef.sensitive_word(url))
19
 
20
 
21
  domain_name = ''
 
26
  except:
27
  dns = 1
28
 
29
+ features.append(1 if dns == 1 else ef.domainAge(domain_name))
30
+ features.append(1 if dns == 1 else ef.domainEnd(domain_name))
31
 
32
  # HTML & Javascript based features (4)
33
  dom = []
 
36
  except:
37
  response = ""
38
 
39
+ dom.append(ef.iframe(response))
40
+ dom.append(ef.mouseOver(response))
41
+ dom.append(ef.forwarding(response))
42
 
43
+ features.append(ef.has_unicode(url)+ef.haveAtSign(url)+ef.havingIP(url))
44
 
45
  with open('pca_model.pkl', 'rb') as file:
46
  pca = pk.load(file)