Spaces:

allenchienxxx
/

PEF

Runtime error

App Files Files Community

allenchienxxx commited on Jun 17, 2023

Commit

01cbf36

•

1 Parent(s): 0cc4e8f

Upload 13 files

Browse files

Files changed (13) hide show

analze.py +154 -0
main.py +34 -0
modules.py +377 -0
save_models/RF_Num.pkl +3 -0
save_models/RF_extra.pkl +3 -0
save_models/SVM_finalcontent.pkl +3 -0
save_models/Stack_tag.pkl +3 -0
static/css/styles.css +86 -0
templates/home.html +37 -0
vectorizer/content_tfidf.pickle +3 -0
vectorizer/extra_scaler.pkl +3 -0
vectorizer/html_cv.pickle +3 -0
vectorizer/num_scaler.pkl +3 -0

analze.py ADDED Viewed

	@@ -0,0 +1,154 @@

+from modules import *
+from pathlib import Path
+import pandas as pd
+from flask import Flask, render_template, request
+import nltk
+import pickle
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from joblib import load
+import sklearn
+import ssl
+try:
+    _create_unverified_https_context = ssl._create_unverified_context
+except AttributeError:
+    pass
+else:
+    ssl._create_default_https_context = _create_unverified_https_context
+# nltk.download('stopwords')
+# nltk.download('punkt')
+# nltk.download('omw-1.4')
+# nltk.download('wordnet')
+def check_file_type(file):
+    file_extension = Path(file.filename).suffix.lower()
+    if file_extension == '.eml' or file_extension == '.txt':
+        save_file(file)
+        return 'Extracted Features'
+        # return get_features('email files/' + file.filename)
+    else:
+        return "Please select .eml or .txt file."
+def save_file(file):
+    file_path = 'email files/' + file.filename
+    with open(file_path, 'w') as f:
+        f.write(file.read().decode('utf-8'))
+def text_feature(filepath):
+    text = get_text(filepath)
+    # print(text)
+    if text != "":
+        text = text.split()
+        textlist = ' '.join(text)
+        dataf = pd.DataFrame([[textlist]], columns=['text'])
+        return dataf
+def html_tags_feature(filepath):
+    tags = get_tags_from_html(get_html_general(filepath))
+    taglist = ' '.join(tags) if tags !=[] else []
+    dataf = pd.DataFrame([[taglist]], columns=['tags'])
+    return dataf
+def extra_feature(filepath):
+    spf = check_spf(filepath)
+    dkim = check_dkim(filepath)
+    dmarc = check_dmarc(filepath)
+    deliver_receiver = check_deliver_receiver(filepath)
+    encript = check_encript(filepath)
+    onclick = get_onclicks(filepath)
+    popwindow = check_popWindow(filepath)
+    extra_data_row = [spf, dkim, dmarc, deliver_receiver, encript, onclick, popwindow]
+    extra_data_row = [0 if x is None else x for x in extra_data_row]
+    extra_data_row = [1 if x is True else x for x in extra_data_row]
+    extra_data_row = [0 if x is False else x for x in extra_data_row]
+    extra_data = pd.DataFrame([extra_data_row],
+                              columns=['SPF(Pass:1,Neutral:2,Softdail:3,None:0)', 'DKIM', 'DMARC', 'Deliver-to Matches Receiver', 'Message_encrtpted', 'Onclick_events', 'Popwindow'])
+    return extra_data
+def num_feature(filepath):
+    body_richness = get_body_richness(filepath)
+    func_words = get_num_FunctionWords(filepath)
+    sbj_richness = get_sbj_richness(filepath)
+    urls = get_num_urls(filepath)
+    ipurls = get_num_urls_ip(filepath)
+    imageurls = get_num_image_urls(filepath)
+    domainurls = get_num_domain_urls(filepath)
+    urlport = get_num_url_ports(filepath)
+    sen_chars = get_chars_sender(filepath)
+    num_data_row = [body_richness, func_words, sbj_richness, urls, ipurls, imageurls, domainurls, urlport, sen_chars]
+    num_data_row = [0 if x is None else x for x in num_data_row]
+    num_data = pd.DataFrame([num_data_row],
+                            columns=['body richness', 'Include function words', 'Subject richness', 'Numers of URLs', 'IPURLs', 'ImageURLs',
+                                     'DomainURLs', 'URLs contain port information', 'Characters in senders'])
+    return num_data
+def get_features(filepath):
+    # text
+    textlist = text_feature(filepath)
+    # html tags
+    taglist = html_tags_feature(filepath)
+    #extra feature
+    extra_data = extra_feature(filepath)
+    # Numeric data
+    num_data = num_feature(filepath)
+    combined_df = pd.concat([textlist, taglist, num_data,extra_data], axis=1)
+    # print(combined_df)
+    return combined_df
+def predict_content(content):
+    content_clf = load("save_models/SVM_finalcontent.pkl")
+    predict = content_clf.predict(preprocess_content(content))
+    return "Legitimate" if predict[0]=='ham' else "Phishing"
+def predict_html(html_tag):
+    html_clf = load("save_models/Stack_tag.pkl")
+    predict = html_clf.predict(preprocess_html(html_tag))
+    return "Legitimate" if predict[0]=='ham' else "Phishing"
+def predict_num(num_df):
+    num_clf = load("save_models/RF_Num.pkl")
+    predict = num_clf.predict(preprocess_num(num_df))
+    return "Legitimate" if predict[0]=='ham' else "Phishing"
+def predict_extra(extra_df):
+    extra_clf = load("save_models/RF_extra.pkl")
+    predict = extra_clf.predict(preprocess_extra(extra_df))
+    return "Legitimate" if predict[0]=='ham' else "Phishing"
+def preprocess_content(content):
+    with open('vectorizer/content_tfidf.pickle', 'rb') as f:
+        tfidf = pickle.load(f)
+    # Transform feature input to TF-IDF
+    content_tfidf = tfidf.transform(content)
+    return content_tfidf
+def preprocess_html(html_tag):
+    with open('vectorizer/html_cv.pickle', 'rb') as f:
+        cv = pickle.load(f)
+    tag_data = cv.transform(html_tag)
+    return tag_data
+def preprocess_num(num_df):
+    with open('vectorizer/num_scaler.pkl', 'rb') as f:
+        num_scaler = pickle.load(f)
+    scale_num = num_scaler.transform(num_df.values)
+    return scale_num
+def preprocess_extra(extra_df):
+    with open('vectorizer/extra_scaler.pkl', 'rb') as f:
+        extra_scaler = pickle.load(f)
+    scale_extra = extra_scaler.transform(extra_df.values)
+    return scale_extra
+lemmatizer = WordNetLemmatizer()
+def customtokenize(str):
+    # Split string as tokens
+    tokens = nltk.word_tokenize(str)
+    # Filter for stopwords
+    nostop = list(filter(lambda token: token not in stopwords.words('english'), tokens))
+    # Perform lemmatization
+    lemmatized = [lemmatizer.lemmatize(word) for word in nostop]
+    return lemmatized

main.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from analze import *
+app = Flask(__name__)
+@app.route('/')
+def home():
+    return render_template('home.html')
+@app.route('/upload', methods=['GET', 'POST'])
+def upload_file():
+    if request.method == 'POST':
+        # Check if a file was uploaded
+        if 'file' not in request.files:
+            return render_template('home.html', content='No file uploaded.')
+        file = request.files['file']
+        # Check if the file has a filename
+        if file.filename == '':
+            return render_template('home.html', content='No file selected.')
+        filepath = 'email files/' + file.filename
+        return render_template('home.html',
+                               content=check_file_type(file),
+                               features = get_features(filepath),
+                               pre_content=predict_content(text_feature(filepath)),
+                               pre_tag=predict_html(html_tags_feature(filepath)),
+                               pre_num=predict_num(num_feature(filepath)),
+                               pre_extra=predict_extra(extra_feature(filepath)))
+    return render_template('home.html')
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=8000)

modules.py ADDED Viewed

	@@ -0,0 +1,377 @@

+def get_text_from_html(html_content):
+    soup = BeautifulSoup(html_content, 'html.parser')
+    # extract all the texts
+    all_text = soup.get_text()
+    all_text = re.sub(r"\s+", " ", all_text)
+    # print(all_text)
+    return all_text
+# get text content type from email
+def get_text(file_path):
+    with open(file_path, 'rb') as file:
+        message = email.message_from_bytes(file.read())
+        text_content = ""
+        for part in message.walk():
+            if part.get_content_type() == 'text/plain':
+                text_content += part.get_payload(decode=True).decode('iso-8859-1')
+                # print(text_content)
+                return text_content.replace("\n","")
+        if text_content == "":
+            return get_text_from_html(get_html_general(file_path));
+from bs4 import BeautifulSoup
+import email
+def get_email_html(file_path):
+    with open(file_path, 'rb') as file:
+        content = email.message_from_bytes(file.read())
+        html_content = ""
+        for part in content.walk():
+            if part.get_content_type() == 'text/html':
+                html_content += part.get_payload(decode=True).decode('iso-8859-1')
+        html_content.replace("\n","")
+        if html_content != "":
+            # print("Found html at "+file_path)
+            return html_content
+        else:
+            # print("No html content found at "+file_path)
+            return ""
+#get html by searching for <html> tag
+def get_html(file_path):
+    with open(file_path, 'r',encoding='iso-8859-1') as file:
+        html_flag = False
+        html_content = "";
+        tag_list = []
+        for line in file:
+            words = line.split()
+            for word in words:
+                if word == "<html>":
+                    html_flag = True;
+                if html_flag:
+                    html_content += word
+                if word == "</html>":
+                    html_flag = False;
+        # print(html_content)
+        html_content.replace("\n","")
+        if html_content == "":
+            # print("No html content found at "+file_path)
+            return ""
+        else:
+            # print("Found html at "+file_path)
+            return html_content
+def get_html_general(file_path):
+    if get_email_html(file_path)!="":
+        return get_email_html(file_path)
+    else:
+        return get_html(file_path)
+def get_onclicks(file_path):
+    content = get_html_general(file_path)
+    if content == "": return None
+    soup = BeautifulSoup(content, 'html.parser')
+    elements = soup.find_all(attrs={'onClick': True})
+    # Count the number of elements with an onClick attribute
+    count = len(elements)
+    return count
+def check_popWindow(file_path):
+    content = get_html_general(file_path)
+    if content == "": return None
+    soup = BeautifulSoup(content, 'html.parser')
+    # Check if any <script> tags were found
+    try:
+        scripts = soup.find_all('script', text=lambda text: 'window.open' in text)
+        if scripts:
+            return True
+            # print('The email body contains a script that attempts to modify the status bar.')
+        else:
+            # print('The email body does not contain a script that attempts to modify the status bar.')
+            return False
+    except TypeError:
+        return False
+def check_spf(file_path):
+  with open(file_path, 'rb') as file:
+    message = email.message_from_bytes(file.read())
+    received_spf_header = message.get('Received-SPF')
+    if received_spf_header == None:
+      return 0
+    if received_spf_header:
+        spf_result = received_spf_header.split()[0].lower()
+        if spf_result == 'pass':
+          return 1
+        elif spf_result == 'neutral':
+          return 2
+        elif spf_result == 'softfail':
+          return 3
+        else:
+          return 0
+    else:
+        return 0
+def check_dkim(file_path):
+  with open(file_path, 'rb') as file:
+    message = email.message_from_bytes(file.read())
+    auth = message.get('Authentication-Results')
+    if auth == None:
+      return 0
+    auth_result = auth.split()
+    # print(auth)
+    # print(dkim_result)
+    if 'dkim=pass' in auth_result:
+      return 1
+    else:
+      return 0
+def check_dmarc(file_path):
+  with open(file_path, 'rb') as file:
+    message = email.message_from_bytes(file.read())
+    auth = message.get('Authentication-Results')
+    if auth == None:
+      return 0
+    auth_result = auth.split()
+    # print(auth)
+    # print(dkim_result)
+    if 'dmarc=pass' in auth_result:
+      return 1
+    else:
+      return 0
+def check_deliver_receiver(filepath):
+  with open(filepath, 'rb') as file:
+    message = email.message_from_bytes(file.read())
+    deliver = message.get('Delivered-To')
+    # print(deliver)
+    receiver = message.get('To')
+    # print(receiver)
+    if deliver == receiver:
+      return 1
+    else:
+      return 0
+def check_encript(filepath):
+  with open(filepath, 'rb') as file:
+    message = email.message_from_bytes(file.read())
+    received_headers = message.get_all('Received')
+    # print(received_headers)
+    version_string = 'version'
+    try:
+      for received_header in received_headers:
+          if version_string in received_header:
+              return 1
+    except TypeError:
+      return 0
+    return 0
+def get_tags_from_html(html_content):
+    soup = BeautifulSoup(html_content, 'html.parser')
+    tag_list = []
+    html_tags = soup.find_all()
+    for tag in html_tags:
+        tag_list += [tag.name]
+    # print(tag_list)
+    return tag_list
+import ipaddress
+from urllib.parse import urlparse
+import urllib.request
+from bs4 import BeautifulSoup
+import re
+import email
+#get urls in html content
+def get_urls_from_html(html_content):
+    soup = BeautifulSoup(html_content, 'html.parser')
+    urls = []
+    # get all the urls
+    anchor_tags = soup.find_all('a')
+    for tag in anchor_tags:
+        href = tag.get('href')
+        if href:
+            if re.match('^https?://', href):
+                # print(href)
+                urls += [href]
+    return urls
+def get_text(file_path):
+    with open(file_path, 'rb') as file:
+        message = email.message_from_bytes(file.read())
+        text_content = ""
+        for part in message.walk():
+            if part.get_content_type() == 'text/plain':
+                text_content += part.get_payload(decode=True).decode('iso-8859-1')
+                # print(text_content)
+                return text_content.replace("\n","")
+        if text_content == "":
+            return get_text_from_html(get_html_general(file_path));
+def get_num_words(file_path):
+    if get_text(file_path) != "":
+        words = len(get_text(file_path).split())
+        return words
+    if get_html_general(file_path) != "":
+        words = len(get_text_from_html(get_html_general(file_path)).split())
+        return words
+    else:
+        return 0
+# get how many characters in the email text or html
+def get_num_chars(file_path):
+    if get_text(file_path) != "":
+        chars = len(get_text(file_path).replace(" ",""))
+        return chars
+    if get_html_general(file_path) != "":
+        chars = len(get_text_from_html(get_html_general(file_path)).replace(" ",""))
+        return chars
+    else:
+        return 0
+#calculate the body richness by dividing number of words with number of characters
+def get_body_richness(filepath):
+    if get_num_chars(filepath) == 0: return 0
+    return get_num_words(filepath)/get_num_chars(filepath)
+#get how many function words is in the content
+def get_num_FunctionWords(file_path):
+    function_words = ["account","access","bank","credit","click","identity","inconvenience","information","limited","log","minutes","password","recently","risk","social","security","service","suspended"]
+    content = ""
+    count = 0
+    if get_text(file_path) != "":
+        content = get_text(file_path).split()
+    elif get_html_general(file_path) != "":
+        content = get_text_from_html(get_html_general(file_path)).split()
+    else:
+        return None
+    for w in function_words:
+        if w in content:
+            count += 1
+    return count
+def get_email_html(file_path):
+    with open(file_path, 'rb') as file:
+        content = email.message_from_bytes(file.read())
+        html_content = ""
+        for part in content.walk():
+            if part.get_content_type() == 'text/html':
+                html_content += part.get_payload(decode=True).decode('iso-8859-1')
+        html_content.replace("\n","")
+        if html_content != "":
+            # print("Found html at "+file_path)
+            return html_content
+        else:
+            # print("No html content found at "+file_path)
+            return ""
+#get how many words in subject
+def get_num_sbj(file_path):
+    count = len(get_subject(file_path).split())
+    return count
+def get_subject(file_path):
+    with open(file_path, 'rb') as file:
+        message = email.message_from_bytes(file.read())
+        headers = message.items()
+        # Print the headers
+        subject = ""
+        for header in headers:
+            if header[0] == "Subject":
+                # print(header[1])
+                subject = header[1]
+                break
+        # if subject == "":
+            # print("No subject found")
+        subject = re.sub(r"\s+", " ", str(subject))
+        return subject
+def get_sender(file_path):
+    with open(file_path, 'rb') as file:
+        message = email.message_from_bytes(file.read())
+        headers = message.items()
+        # Print the headers
+        sender = ""
+        for header in headers:
+            if header[0] == "From":
+                # print(header[1])
+                sender = header[1]
+                break
+        if sender == "":
+            return None
+        # subject = re.sub(r"\s+", " ", str(subject))
+        return sender
+#get how many characters in subject
+def get_num_sbjChar(file_path):
+    count = len(get_subject(file_path))
+    return count
+#claculate the subject richness by dividing words with characters
+def get_sbj_richness(file_path):
+    if get_num_sbjChar(file_path) == 0:return 0
+    return get_num_sbj(file_path)/get_num_sbjChar(file_path)
+# get how many urls have ip address in it
+def get_num_urls_ip(file_path):
+    content = get_html_general(file_path)
+    if content == "": return 0
+    urls = get_urls_from_html(content)
+    num_ip = 0
+    for url in urls:
+        from urllib.parse import urlparse
+        hostname = urlparse(url).hostname
+        try:
+            ip_address = ipaddress.ip_address(hostname)
+            num_ip+=1
+            # print(f"{url} contains an IP address: {ip_address}")
+        except ValueError:
+            pass
+            # print(f"{url} does not contain an IP address")
+    return num_ip
+# return the total amount of urls in html content
+def get_num_urls(file_path):
+    urls = get_urls_from_html(get_html_general(file_path))
+    if urls == []:
+        return None
+    return len(urls)
+# get how many image urls in the html
+def get_num_image_urls(file_path):
+    soup = BeautifulSoup(get_html_general(file_path), 'html.parser')
+    # Find all <a> tags that contain an <img> tag
+    image_links = soup.find_all('a', href=True, recursive=True, limit=None, string=None)
+    image_links_with_img = [link for link in image_links if link.find('img')]
+    return len(image_links_with_img)
+    # Extract the href and src attributes of each image link
+    # for link in image_links_with_img:
+    #     href = link['href']
+    #     src = link.find('img')['src']
+    #     print(f"Clickable image link: {href} - Image URL: {src}")
+# get numbers of urls contain domain name
+def get_num_domain_urls(file_path):
+    urls = get_urls_from_html(get_html_general(file_path))
+    domains = set()
+    for url in urls:
+        match = re.search(r'https?://([^/]+)/', url)
+        if match:
+            domain = match.group(1)
+            domains.add(domain)
+    # Count the number of domains in the set and print the result
+    num_domains = len(domains)
+    return num_domains
+#get how many urls contain port info
+def get_num_url_ports(file_path):
+    urls = get_urls_from_html(get_html_general(file_path))
+    count = 0
+    for url in urls:
+        parsed_url = urlparse(url)
+        # Check if the parsed URL includes a port number
+        if parsed_url.port:
+            count += 1
+        #     print(f'The URL "{url}" contains port {parsed_url.port}')
+        # else:
+        #     print(f'The URL "{url}" does not contain a port')
+    return count
+#get how many characters in sender
+def get_chars_sender(file_path):
+    sender = get_sender(file_path)
+    return len(str(sender))

save_models/RF_Num.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2442b46ad908f4a41ce2030e10d3e59b92635396fb95c3a0d85aa74262720ef5
+size 5911369

save_models/RF_extra.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:881e7727b12970a7e473e00944f6bfbf9afd732300ce48af8d714e1ceafcfb06
+size 183913

save_models/SVM_finalcontent.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4541c52b73022168b124d0f115f717e55f50553fe6eea9afccd07524de0e019
+size 4304747

save_models/Stack_tag.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e658df654d3620130d41ac50be3788e29e81b58c6974e1e89d06c59ad14a7f4
+size 7632960

static/css/styles.css ADDED Viewed

	@@ -0,0 +1,86 @@

+body {
+  background-color: lightblue;
+  margin: 0;
+  color: black;
+}
+h1 { color: #111; font-family: 'Helvetica Neue', sans-serif; font-size: 50px; font-weight: bold; letter-spacing: -1px; line-height: 1; text-align: center; }
+h2 { color: #111; font-family: 'Open Sans', sans-serif; font-size: 20px; font-weight: 300; line-height: 32px; margin: 0 0 30px; text-align: center; }
+p { color: #685206; font-family: 'Helvetica Neue', sans-serif; font-size: 15px; line-height: 24px; margin: 0 0 24px; text-align: justify; text-justify: inter-word; }
+.list {
+    max-width: 400px;
+    overflow-x: auto;
+    list-style: none;
+}
+.container {
+    display: flex;
+}
+.box {
+    border: 5px dashed black;
+    width: 500px;
+    margin: 50px;
+    padding: 10px;
+    float: left;
+}
+.pretty {
+    font-family: "Helvetica Neue", Arial, sans-serif;
+    font-size: 14px;
+    line-height: 1.5;
+    text-align: left;
+    text-shadow: 1px 1px 1px rgba(0, 0, 0, 0.1);
+    text-transform: uppercase;
+    letter-spacing: 1px;
+    word-spacing: 2px;
+    list-style: none;
+}
+.header {
+    padding: 10px;
+    text-align: center;
+    font-size: 24px;
+    border: 5px dashed black;
+}
+.button-81 {
+  background-color: #fff;
+  border: 0 solid #e2e8f0;
+  border-radius: 1.5rem;
+  box-sizing: border-box;
+  color: #0d172a;
+  cursor: pointer;
+  display: inline-block;
+  font-family: "Basier circle",-apple-system,system-ui,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";
+  font-size: 1.1rem;
+  font-weight: 600;
+  line-height: 1;
+  padding: 1rem 1.6rem;
+  text-align: center;
+  text-decoration: none #0d172a solid;
+  text-decoration-thickness: auto;
+  transition: all .1s cubic-bezier(.4, 0, .2, 1);
+  box-shadow: 0px 1px 2px rgba(166, 175, 195, 0.25);
+  user-select: none;
+  -webkit-user-select: none;
+  touch-action: manipulation;
+}
+.button-81:hover {
+  background-color: #1e293b;
+  color: #fff;
+}
+@media (min-width: 768px) {
+  .button-81 {
+    font-size: 1.125rem;
+    padding: 1rem 2rem;
+  }
+}

templates/home.html ADDED Viewed

	@@ -0,0 +1,37 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <title>Data Visualization</title>
+    <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/styles.css') }}">
+</head>
+<body>
+    <div class="header">
+        <h1>Welcome to Phishing Email Detection!</h1>
+        <form method="POST" action="/upload" enctype="multipart/form-data">
+            <input type="file" name="file" id="file-input" accept=".txt,.eml">
+            <input type="submit" value="Upload" class="button-81">
+        </form>
+    </div>
+    <div class="container">
+        <div class="box">
+            {% if content %}
+                <h2>{{ content }}</h2>
+            {% endif %}
+            <ul class="list">
+                {% for feature in features %}
+                <li><pre class="pretty">{{ feature }}: <p>{{ features[feature][0] }}</p></pre></li>
+                {% endfor %}
+            </ul>
+        </div>
+        <div class="box">
+            <h2>Prediction</h2>
+            <ul class="pretty">
+                <li>Content prediction: <p>{{ pre_content }}</p></li>
+                <li>Html Tag prediction: <p>{{ pre_tag }}</p></li>
+                <li>Numeric prediction: <p>{{ pre_num }}</p></li>
+                <li>Extra prediction: <p>{{ pre_extra }}</p></li>
+            </ul>
+        </div>
+    </div>
+</body>
+</html>

vectorizer/content_tfidf.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c78b2719d42bf0c36db85c60270770fb6decd878bf2e61cddf13bf2cdee8e19f
+size 4422275

vectorizer/extra_scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3548c38d06e7e04f11df4bcdb29ad7aaeee985af2e3701f4f9d51a79cd7de041
+size 776

vectorizer/html_cv.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84cf833e1bd8975474669746947e93a7bf4b9ec1046f9d8e88d98dc459c860f9
+size 6814

vectorizer/num_scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1a847823b219353781e60d8672e4c2b88720d111dc0a543c3ece441f52ce06f
+size 665