Upload 12 files
Browse files- .gitattributes +1 -0
- BERT_experiments.ipynb +0 -0
- FastText_experiments.ipynb +0 -0
- LSTM_experiments.ipynb +0 -0
- app.py +252 -2
- cnn_experiments.ipynb +0 -0
- doc2vec_experiments.ipynb +0 -0
- glove_experiments.ipynb +0 -0
- graph.jpg +3 -0
- old_app.py +4 -0
- tf_idf_experiments.ipynb +0 -0
- voting_experiments.ipynb +323 -0
.gitattributes
CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
GloVe/glove.6B.50d.txt filter=lfs diff=lfs merge=lfs -text
|
37 |
models/best_doc2vec_embeddings filter=lfs diff=lfs merge=lfs -text
|
|
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
GloVe/glove.6B.50d.txt filter=lfs diff=lfs merge=lfs -text
|
37 |
models/best_doc2vec_embeddings filter=lfs diff=lfs merge=lfs -text
|
38 |
+
graph.jpg filter=lfs diff=lfs merge=lfs -text
|
BERT_experiments.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
FastText_experiments.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
LSTM_experiments.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -1,4 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
st.
|
|
|
1 |
+
"""
|
2 |
+
#* make sure to run the application using this commmand:
|
3 |
+
>>> streamlit run main.py
|
4 |
+
"""
|
5 |
+
|
6 |
+
# global
|
7 |
import streamlit as st
|
8 |
+
from lime.lime_text import LimeTextExplainer
|
9 |
+
from nltk.corpus import stopwords
|
10 |
+
|
11 |
+
# local
|
12 |
+
from deployment_utils import DataPreparator, Predictor, generate_random_sample, generate_highlighted_words, extract_case_information
|
13 |
+
|
14 |
+
|
15 |
+
# instantiate `DataPreparator` & `Predictor` objects
|
16 |
+
data_preparator = DataPreparator()
|
17 |
+
predictor = Predictor()
|
18 |
+
eng_stop_words = stopwords.words("english")
|
19 |
+
|
20 |
+
st.set_page_config(
|
21 |
+
page_title="JudgerAI",
|
22 |
+
page_icon="🧊",
|
23 |
+
layout="wide")
|
24 |
+
|
25 |
+
# for custom CSS styling
|
26 |
+
with open("F:\Graduation Project\Project\src\style.css") as f:
|
27 |
+
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
|
28 |
+
|
29 |
+
# application header
|
30 |
+
left_col, right_col = st.columns(2)
|
31 |
+
|
32 |
+
with left_col:
|
33 |
+
st.header("Summarize your Case")
|
34 |
+
with st.expander(label="Case Summarizer", expanded=True):
|
35 |
+
option = st.selectbox(
|
36 |
+
'Choose a Method for Entering your Case Facts',
|
37 |
+
('Upload a File', 'Write it Myself'))
|
38 |
+
|
39 |
+
if option == "Upload a File":
|
40 |
+
uploaded_file = st.file_uploader(
|
41 |
+
label='Upload your Case File (.txt)', type=['txt'])
|
42 |
+
if uploaded_file is not None:
|
43 |
+
content = uploaded_file.getvalue().decode("utf-8")
|
44 |
+
petitioner, respondent, case_facts = extract_case_information(
|
45 |
+
content)
|
46 |
+
|
47 |
+
col1, col2 = st.columns(2)
|
48 |
+
|
49 |
+
with col1:
|
50 |
+
st.write(
|
51 |
+
'<p class="bold-text"> Petitioner </p>', unsafe_allow_html=True)
|
52 |
+
st.info(petitioner)
|
53 |
+
with col2:
|
54 |
+
st.write(
|
55 |
+
'<p class="bold-text"> Respondent </p>', unsafe_allow_html=True)
|
56 |
+
st.info(respondent)
|
57 |
+
st.write(
|
58 |
+
'<p class="bold-text"> Case Facts </p>', unsafe_allow_html=True)
|
59 |
+
st.info(case_facts)
|
60 |
+
|
61 |
+
else:
|
62 |
+
case_facts = st.text_area(
|
63 |
+
label="Enter your Case Facts youself", height=250)
|
64 |
+
|
65 |
+
submitted = st.button(label="Summarize")
|
66 |
+
|
67 |
+
if submitted:
|
68 |
+
with st.spinner("Your Case is being Summarized..."):
|
69 |
+
summarized_case_facts = predictor.summarize_facts(case_facts)
|
70 |
+
st.write(
|
71 |
+
'<p class="bold-text"> Your Summarized Case Facts </p>', unsafe_allow_html=True)
|
72 |
+
st.success(summarized_case_facts)
|
73 |
+
|
74 |
+
summarized_case_facts_file = "petitioner: " + petitioner + "\n" + \
|
75 |
+
"respondent: " + respondent + "\n" + "facts: " + summarized_case_facts
|
76 |
+
|
77 |
+
btn = st.download_button(
|
78 |
+
label="Download",
|
79 |
+
data=summarized_case_facts_file,
|
80 |
+
file_name="summarized_case_facts.txt",
|
81 |
+
mime="file/txt"
|
82 |
+
)
|
83 |
+
|
84 |
+
with right_col:
|
85 |
+
st.header("Predict the Outcome")
|
86 |
+
|
87 |
+
# get_random_case_button = st.button(label="Get Random Sample")
|
88 |
+
|
89 |
+
# input form
|
90 |
+
with st.expander(label="Case Outcome Predictor", expanded=True):
|
91 |
+
option = st.selectbox(
|
92 |
+
'Choose a Method for Entering your Case Information',
|
93 |
+
('Upload a File', 'Write it Myself'))
|
94 |
+
|
95 |
+
if option == 'Upload a File':
|
96 |
+
uploaded_file = st.file_uploader(
|
97 |
+
label='Upload your Case File (.txt)', type=['txt'], key="prediction_case_uploader")
|
98 |
+
if uploaded_file is not None:
|
99 |
+
content = uploaded_file.getvalue().decode("utf-8")
|
100 |
+
petitioner, respondent, case_facts = extract_case_information(
|
101 |
+
content)
|
102 |
+
|
103 |
+
st.session_state["petitioner"] = petitioner.strip()
|
104 |
+
st.session_state["respondent"] = respondent.strip()
|
105 |
+
st.session_state["facts"] = case_facts.strip()
|
106 |
+
|
107 |
+
option = st.selectbox(
|
108 |
+
"Select Model",
|
109 |
+
("TF-IDF", "1D Convolutional", "GloVe", "BERT", "Doc2Vec",
|
110 |
+
"LSTM", "FastText", "Ensemble (Doc2Vec + TF-IDF)")
|
111 |
+
)
|
112 |
+
|
113 |
+
# if `Get Random Sample` btn is pressed
|
114 |
+
# if get_random_case_button:
|
115 |
+
# random_petitioner, random_respondent, random_facts, random_label = generate_random_sample()
|
116 |
+
# st.session_state["petitioner"] = random_petitioner.strip()
|
117 |
+
# st.session_state["respondent"] = random_respondent.strip()
|
118 |
+
# st.session_state["facts"] = random_facts.strip()
|
119 |
+
# st.success(f"Original label: {random_label}")
|
120 |
+
|
121 |
+
col1, col2 = st.columns(2)
|
122 |
+
|
123 |
+
with col1:
|
124 |
+
petitioner = st.text_input(
|
125 |
+
label="Petitioner", key="petitioner")
|
126 |
+
|
127 |
+
with col2:
|
128 |
+
respondent = st.text_input(
|
129 |
+
label="Respondent", key="respondent")
|
130 |
+
|
131 |
+
global facts
|
132 |
+
facts = st.text_area(label="Case Facts",
|
133 |
+
height=300, key="facts")
|
134 |
+
|
135 |
+
# remove stopwords to not highlight it
|
136 |
+
facts = " ".join([word for word in facts.split()
|
137 |
+
if word not in eng_stop_words])
|
138 |
+
|
139 |
+
# create `LimeTextExplainer` for models interpretation
|
140 |
+
class_names = [petitioner, respondent]
|
141 |
+
explainer = LimeTextExplainer(class_names=class_names)
|
142 |
+
|
143 |
+
submitted = st.button(label="Predict")
|
144 |
+
|
145 |
+
if submitted:
|
146 |
+
if petitioner and respondent and facts:
|
147 |
+
with st.spinner("Analyzing Case Facts ..."):
|
148 |
+
# get predcitions
|
149 |
+
if option == "Doc2Vec":
|
150 |
+
predictions = predictor.predict_doc2vec(facts)
|
151 |
+
output = explainer.explain_instance(
|
152 |
+
facts, predictor.predict_doc2vec)
|
153 |
+
important_words = output.as_list()
|
154 |
+
|
155 |
+
elif option == "TF-IDF":
|
156 |
+
anonymized_facts = data_preparator._anonymize_facts(
|
157 |
+
petitioner, respondent, facts)
|
158 |
+
predictions = predictor.predict_tf_idf(
|
159 |
+
anonymized_facts)
|
160 |
+
output = explainer.explain_instance(
|
161 |
+
anonymized_facts, predictor.predict_tf_idf)
|
162 |
+
important_words = output.as_list()
|
163 |
+
|
164 |
+
elif option == "1D Convolutional":
|
165 |
+
predictions = predictor.predict_cnn(facts)
|
166 |
+
output = explainer.explain_instance(
|
167 |
+
facts, predictor.predict_cnn)
|
168 |
+
important_words = output.as_list()
|
169 |
+
|
170 |
+
elif option == "GloVe":
|
171 |
+
predictions = predictor.predict_glove(facts)
|
172 |
+
output = explainer.explain_instance(
|
173 |
+
facts, predictor.predict_glove)
|
174 |
+
important_words = output.as_list()
|
175 |
+
|
176 |
+
elif option == "LSTM":
|
177 |
+
predictions = predictor.predict_lstm(facts)
|
178 |
+
output = explainer.explain_instance(
|
179 |
+
facts, predictor.predict_lstm)
|
180 |
+
important_words = output.as_list()
|
181 |
+
|
182 |
+
elif option == "BERT":
|
183 |
+
predictions = predictor.predict_bert(facts)
|
184 |
+
|
185 |
+
elif option == "FastText":
|
186 |
+
predictions = predictor.predict_fasttext(facts)
|
187 |
+
|
188 |
+
elif option == "Ensemble (Doc2Vec + TF-IDF)":
|
189 |
+
doc2vec_predictions = predictor.predict_doc2vec(facts)
|
190 |
+
tf_idf_predictions = predictor.predict_tf_idf(facts)
|
191 |
+
|
192 |
+
predictions = (doc2vec_predictions +
|
193 |
+
tf_idf_predictions) / 2
|
194 |
+
|
195 |
+
doc2vec_output = explainer.explain_instance(
|
196 |
+
facts, predictor.predict_doc2vec)
|
197 |
+
doc2vec_important_words = doc2vec_output.as_list()
|
198 |
+
|
199 |
+
tf_idf_output = explainer.explain_instance(
|
200 |
+
facts, predictor.predict_tf_idf)
|
201 |
+
tf_idf_important_words = tf_idf_output.as_list()
|
202 |
+
|
203 |
+
important_words = doc2vec_important_words + tf_idf_important_words
|
204 |
+
|
205 |
+
# displaying predictions
|
206 |
+
col1, col2 = st.columns(2)
|
207 |
+
|
208 |
+
with col1:
|
209 |
+
st.write("Percentage of petitioner winning:")
|
210 |
+
st.warning(f"{predictions[0, 0] * 100:.3f}%")
|
211 |
+
|
212 |
+
with col2:
|
213 |
+
st.write("Percentage of respondent winning:")
|
214 |
+
st.info(f"{predictions[0, 1] * 100:.3f}%")
|
215 |
+
|
216 |
+
st.write("Winning party:")
|
217 |
+
if predictions[0, 0] > predictions[0, 1]:
|
218 |
+
st.success(petitioner)
|
219 |
+
else:
|
220 |
+
st.success(respondent)
|
221 |
+
|
222 |
+
# displaying highlighted words
|
223 |
+
st.write(
|
224 |
+
'<p class="bold-text"> Top Words for Model\'s Decision: </p>', unsafe_allow_html=True)
|
225 |
+
|
226 |
+
if option not in ["BERT", "FastText"]:
|
227 |
+
petitioner_words = [word for word,
|
228 |
+
score in important_words if score < 0]
|
229 |
+
respondent_words = [word for word,
|
230 |
+
score in important_words if score > 0]
|
231 |
+
|
232 |
+
for name in petitioner.split(" "):
|
233 |
+
if name in petitioner_words:
|
234 |
+
petitioner_words.remove(name)
|
235 |
+
elif name in respondent_words:
|
236 |
+
respondent_words.remove(name)
|
237 |
+
|
238 |
+
for name in respondent.split(" "):
|
239 |
+
if name in petitioner_words:
|
240 |
+
petitioner_words.remove(name)
|
241 |
+
elif name in respondent_words:
|
242 |
+
respondent_words.remove(name)
|
243 |
+
|
244 |
+
rendered_text = generate_highlighted_words(
|
245 |
+
facts, petitioner_words, respondent_words)
|
246 |
+
|
247 |
+
st.write(rendered_text, unsafe_allow_html=True)
|
248 |
+
|
249 |
+
else:
|
250 |
+
st.warning(
|
251 |
+
"Sadly, this feature is not supported in BERT & FastText :(")
|
252 |
|
253 |
+
else:
|
254 |
+
st.error("Please, fill in all fields!")
|
cnn_experiments.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
doc2vec_experiments.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
glove_experiments.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
graph.jpg
ADDED
![]() |
Git LFS Details
|
old_app.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
x = st.slider('Select a value')
|
4 |
+
st.write(x, 'squared is', x * x)
|
tf_idf_experiments.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
voting_experiments.ipynb
ADDED
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import numpy as np\n",
|
10 |
+
"import pandas as pd\n",
|
11 |
+
"import seaborn as sn\n",
|
12 |
+
"\n",
|
13 |
+
"from tensorflow import keras\n",
|
14 |
+
"from gensim.models.doc2vec import Doc2Vec\n",
|
15 |
+
"from sklearn.metrics import accuracy_score\n",
|
16 |
+
"from sklearn.metrics import confusion_matrix\n",
|
17 |
+
"from sklearn.metrics import classification_report\n",
|
18 |
+
"\n",
|
19 |
+
"from src.preprocessing import Preprocessor\n",
|
20 |
+
"from src.utils import read_data"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "code",
|
25 |
+
"execution_count": 6,
|
26 |
+
"metadata": {},
|
27 |
+
"outputs": [],
|
28 |
+
"source": [
|
29 |
+
"doc2vec_model_embeddings = Doc2Vec.load(\n",
|
30 |
+
" \"models/best_doc2vec_embeddings\")\n",
|
31 |
+
"doc2vec_model = keras.models.load_model(\n",
|
32 |
+
" \"models/best_doc2vec_model.h5\")\n",
|
33 |
+
"tfidf_model = keras.models.load_model(\"models/best_tfidf_model.h5\")"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "code",
|
38 |
+
"execution_count": 2,
|
39 |
+
"metadata": {},
|
40 |
+
"outputs": [],
|
41 |
+
"source": [
|
42 |
+
"X_train, X_test, y_train, y_test = read_data()"
|
43 |
+
]
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"cell_type": "code",
|
47 |
+
"execution_count": 10,
|
48 |
+
"metadata": {},
|
49 |
+
"outputs": [],
|
50 |
+
"source": [
|
51 |
+
"preprocessor = Preprocessor()"
|
52 |
+
]
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"attachments": {},
|
56 |
+
"cell_type": "markdown",
|
57 |
+
"metadata": {},
|
58 |
+
"source": [
|
59 |
+
"### Best combination of TF-IDF model"
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"cell_type": "code",
|
64 |
+
"execution_count": 11,
|
65 |
+
"metadata": {},
|
66 |
+
"outputs": [],
|
67 |
+
"source": [
|
68 |
+
"first_party_names_3 = X_train[\"first_party\"]\n",
|
69 |
+
"second_party_names_3 = X_train[\"second_party\"]\n",
|
70 |
+
"facts_3 = X_train[\"Facts\"]"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"cell_type": "code",
|
75 |
+
"execution_count": 12,
|
76 |
+
"metadata": {},
|
77 |
+
"outputs": [],
|
78 |
+
"source": [
|
79 |
+
"test_first_party_names_3 = X_test[\"first_party\"]\n",
|
80 |
+
"test_second_party_names_3 = X_test[\"second_party\"]\n",
|
81 |
+
"test_facts_3 = X_test[\"Facts\"]"
|
82 |
+
]
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"cell_type": "code",
|
86 |
+
"execution_count": 13,
|
87 |
+
"metadata": {},
|
88 |
+
"outputs": [],
|
89 |
+
"source": [
|
90 |
+
"anonymized_facts_3 = preprocessor.anonymize_data(\n",
|
91 |
+
" first_party_names_3, second_party_names_3, facts_3)\n",
|
92 |
+
"test_anonymized_facts_3 = preprocessor.anonymize_data(\n",
|
93 |
+
" test_first_party_names_3, test_second_party_names_3, test_facts_3)"
|
94 |
+
]
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"cell_type": "code",
|
98 |
+
"execution_count": 14,
|
99 |
+
"metadata": {},
|
100 |
+
"outputs": [],
|
101 |
+
"source": [
|
102 |
+
"text_vectorizer_3, X_train_vectors_3 = preprocessor.convert_text_to_vectors_tf_idf(\n",
|
103 |
+
" anonymized_facts_3)"
|
104 |
+
]
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"cell_type": "code",
|
108 |
+
"execution_count": 15,
|
109 |
+
"metadata": {},
|
110 |
+
"outputs": [],
|
111 |
+
"source": [
|
112 |
+
"X_test_vectors_3 = preprocessor.convert_text_to_vectors_tf_idf(\n",
|
113 |
+
" test_anonymized_facts_3, train=False, text_vectorizer=text_vectorizer_3)"
|
114 |
+
]
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"cell_type": "code",
|
118 |
+
"execution_count": 16,
|
119 |
+
"metadata": {},
|
120 |
+
"outputs": [
|
121 |
+
{
|
122 |
+
"name": "stdout",
|
123 |
+
"output_type": "stream",
|
124 |
+
"text": [
|
125 |
+
"22/22 [==============================] - 1s 9ms/step\n"
|
126 |
+
]
|
127 |
+
}
|
128 |
+
],
|
129 |
+
"source": [
|
130 |
+
"y_preds_tfidf = tfidf_model.predict(X_test_vectors_3)"
|
131 |
+
]
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"cell_type": "code",
|
135 |
+
"execution_count": 39,
|
136 |
+
"metadata": {},
|
137 |
+
"outputs": [
|
138 |
+
{
|
139 |
+
"name": "stdout",
|
140 |
+
"output_type": "stream",
|
141 |
+
"text": [
|
142 |
+
"TF-IDF accuracy: 0.973\n"
|
143 |
+
]
|
144 |
+
}
|
145 |
+
],
|
146 |
+
"source": [
|
147 |
+
"preds = np.where(y_preds_tfidf > 0.5, 1, 0)\n",
|
148 |
+
"accuracy = accuracy_score(y_test, preds)\n",
|
149 |
+
"print(\"TF-IDF accuracy: {:.3f}\".format(accuracy))"
|
150 |
+
]
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"attachments": {},
|
154 |
+
"cell_type": "markdown",
|
155 |
+
"metadata": {},
|
156 |
+
"source": [
|
157 |
+
"### Best Combination of Doc2Vec Model"
|
158 |
+
]
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"cell_type": "code",
|
162 |
+
"execution_count": 40,
|
163 |
+
"metadata": {},
|
164 |
+
"outputs": [],
|
165 |
+
"source": [
|
166 |
+
"X_test_processed = preprocessor.preprocess_data(X_test[\"Facts\"])"
|
167 |
+
]
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"cell_type": "code",
|
171 |
+
"execution_count": 41,
|
172 |
+
"metadata": {},
|
173 |
+
"outputs": [],
|
174 |
+
"source": [
|
175 |
+
"X_test_vectors_doc2vec = preprocessor.convert_text_to_vectors_doc2vec(\n",
|
176 |
+
" X_test_processed, train=False, embeddings_doc2vec=doc2vec_model_embeddings)"
|
177 |
+
]
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"cell_type": "code",
|
181 |
+
"execution_count": 42,
|
182 |
+
"metadata": {},
|
183 |
+
"outputs": [
|
184 |
+
{
|
185 |
+
"name": "stdout",
|
186 |
+
"output_type": "stream",
|
187 |
+
"text": [
|
188 |
+
"22/22 [==============================] - 0s 2ms/step\n"
|
189 |
+
]
|
190 |
+
}
|
191 |
+
],
|
192 |
+
"source": [
|
193 |
+
"y_preds_doc2vec = doc2vec_model.predict(X_test_vectors_doc2vec)"
|
194 |
+
]
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"cell_type": "code",
|
198 |
+
"execution_count": 46,
|
199 |
+
"metadata": {},
|
200 |
+
"outputs": [
|
201 |
+
{
|
202 |
+
"name": "stdout",
|
203 |
+
"output_type": "stream",
|
204 |
+
"text": [
|
205 |
+
"Doc2Vec accuracy: 0.944\n"
|
206 |
+
]
|
207 |
+
}
|
208 |
+
],
|
209 |
+
"source": [
|
210 |
+
"preds_2 = np.where(y_preds_doc2vec > 0.5, 1, 0)\n",
|
211 |
+
"accuracy = accuracy_score(y_test, preds_2)\n",
|
212 |
+
"print(\"Doc2Vec accuracy: {:.3f}\".format(accuracy))"
|
213 |
+
]
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"attachments": {},
|
217 |
+
"cell_type": "markdown",
|
218 |
+
"metadata": {},
|
219 |
+
"source": [
|
220 |
+
"### Combining both accuracies"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"cell_type": "code",
|
225 |
+
"execution_count": 47,
|
226 |
+
"metadata": {},
|
227 |
+
"outputs": [
|
228 |
+
{
|
229 |
+
"name": "stdout",
|
230 |
+
"output_type": "stream",
|
231 |
+
"text": [
|
232 |
+
"Voting accuracy: 0.986\n"
|
233 |
+
]
|
234 |
+
}
|
235 |
+
],
|
236 |
+
"source": [
|
237 |
+
"voting_predcitions = (y_preds_doc2vec + y_preds_tfidf) / 2\n",
|
238 |
+
"voting_predcitions = np.where(voting_predcitions > 0.5, 1, 0)\n",
|
239 |
+
"accuracy = accuracy_score(y_test, voting_predcitions)\n",
|
240 |
+
"print(\"Voting accuracy: {:.3f}\".format(accuracy))"
|
241 |
+
]
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"cell_type": "code",
|
245 |
+
"execution_count": 51,
|
246 |
+
"metadata": {},
|
247 |
+
"outputs": [
|
248 |
+
{
|
249 |
+
"data": {
|
250 |
+
"text/plain": [
|
251 |
+
"<AxesSubplot:>"
|
252 |
+
]
|
253 |
+
},
|
254 |
+
"execution_count": 51,
|
255 |
+
"metadata": {},
|
256 |
+
"output_type": "execute_result"
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"data": {
|
260 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnAAAAHTCAYAAACncyV+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzK0lEQVR4nO3deXxU1f3/8fcQmMmEyJZNAhS+QSUQcIiJoAUqKIgoKpu1KCICgiWABSwaFsvayObCJqCi1qj4g7gV/brQX78oSgVDk/wA6RdCwUAgZISwZZIhzPz+oI5OwTLBCcNhXk8f9497zr33nOHR6ofP555zLV6v1ysAAAAYo1aoJwAAAIDqIYADAAAwDAEcAACAYQjgAAAADEMABwAAYBgCOAAAAMMQwAEAABiGAA4AAMAwBHAAAACGqR3qCXzvlHN3qKcAoIbYE7uEegoAakiVe3/Ixq6J2KFObFLQn1kTLpkADgAAoFo8p0M9g5ChhAoAAGAYMnAAAMBMXk+oZxAyZOAAAAAMQwYOAACYyRO+GTgCOAAAYCQvJVQAAACYggwcAAAwUxiXUMnAAQAAGIYMHAAAMFMYvwNHAAcAAMzElxgAAABgCjJwAADATGFcQiUDBwAAYBgycAAAwExhvI0IARwAADASX2IAAACAMQjgAACAmTye4B8XaMSIEXriiSd859u3b9c999wjh8Oh/v37a+vWrX7Xr127Vt27d5fD4VBGRoYOHz5crfEI4AAAAH6GDz74QOvXr/edl5eXa8SIEUpPT9fbb7+t1NRUjRw5UuXl5ZKkgoICTZ48WaNHj9Zbb72lY8eOKTMzs1pjEsABAAAzeT3BP6qprKxMc+fOVbt27XxtH374oWw2myZOnKiWLVtq8uTJqlu3rj766CNJUnZ2tnr16qU+ffooOTlZc+fO1fr161VUVBTwuARwAADATJ7TwT+qac6cObr77rt11VVX+dry8/OVlpYmi8UiSbJYLLruuuuUl5fn609PT/dd37hxYyUmJio/Pz/gcQngAAAALsDGjRv19ddfa9SoUX7tpaWlio+P92uLiYnRwYMHJUmHDh36j/2BYBsRAABgphBuI1JZWak//OEPevLJJxUZGenX53K5ZLVa/dqsVqvcbrckqaKi4j/2B4IMHAAAQDUtXrxYbdu2VZcuXc7qs9lsZwVjbrfbF+j9VL/dbg94fDJwAADATCH8EsMHH3wgp9Op1NRUSfIFZB9//LF69+4tp9Ppd73T6fSVTRMSEs7ZHxcXF/D4BHAAAMBMISyhvvbaa6qqqvKdz58/X5L02GOPafPmzXrhhRfk9XplsVjk9Xq1ZcsWPfLII5Ikh8Oh3Nxc9evXT5J04MABHThwQA6HI+DxCeAAAACqqUmTJn7ndevWlSQ1b95cMTExWrBggWbPnq3f/OY3WrVqlVwul3r16iVJGjhwoB544AG1b99e7dq10+zZs9W1a1c1a9Ys4PF5Bw4AAJjpEvoSw49FR0dr+fLlvixbfn6+VqxYoaioKElSamqqZsyYoSVLlmjgwIGqX7++srKyqjWGxev1eoMy25/plHN3qKcAoIbYE89+yRfA5aHKvT9kY1cWfBz0Z9qu7Rn0Z9YESqgAAMBIXm/1N969XBDAAQAAM4VwEUOo8Q4cAACAYcjAAQAAM4VwH7hQIwMHAABgGDJwAADATGH8DhwBHAAAMJMnfFehUkIFAAAwDBk4AABgJkqoAAAAhmEVKgAAAExBBg4AAJgpjEuoZOAAAAAMQwYOAACYKYzfgSOAAwAAZgrjAI4SKgAAgGHIwAEAACN5vXyJAQAAAIYgAwcAAMwUxu/AEcABAAAzsQ8cAAAATEEGDgAAmCmMS6hk4AAAAAxDBg4AAJgpjN+BI4ADAABmooQKAAAAU5CBAwAAZgrjEioZOAAAAMOQgQMAAGYK43fgCOAAAICZwjiAo4QKAABgGDJwAADATCxiAAAAgCnIwAEAADOF8TtwBHAAAMBMlFABAABgCjJwAADATGFcQiUDBwAAYBgCOAAAYCavJ/hHNezdu1fDhg1TamqqunbtqhdffNHXN2vWLLVq1crvyM7O9vWvXbtW3bt3l8PhUEZGhg4fPlytsSmhAgAAM4WwhOrxeDRixAi1a9dO77zzjvbu3avx48crISFBd955pwoLCzVhwgT17dvXd090dLQkqaCgQJMnT9b06dOVnJys2bNnKzMzU8uXLw94fDJwAAAA1eR0OtW6dWtNmzZNLVq00E033aQbb7xRubm5kqTCwkK1adNGcXFxvsNut0uSsrOz1atXL/Xp00fJycmaO3eu1q9fr6KiooDHJ4ADAABm8niCfwQoPj5ezz77rKKjo+X1epWbm6vNmzerQ4cOOnHihEpKStSiRYtz3pufn6/09HTfeePGjZWYmKj8/PyAx6eECgAA8DPcfPPNKi4uVrdu3dSzZ09t3bpVFotFy5Yt02effaYGDRrooYce8pVTDx06pPj4eL9nxMTE6ODBgwGPSQAHAADM5PWGegaSpIULF8rpdGratGnKyspSSkqKLBaLkpKSNGjQIG3evFlTp05VdHS0evTooYqKClmtVr9nWK1Wud3ugMckgAMAAGa6RPaBa9eunSSpsrJSjz32mLZs2aJu3bqpQYMGkqTk5GTt2bNHb775pnr06CGbzXZWsOZ2u33vyAWCd+AAAACqyel0at26dX5tV111lU6dOqUTJ074grfvJSUlqaSkRJKUkJAgp9N51vPi4uICHp8ADgAAmCmEixj27dun0aNH+4IySdq6dasaNWqk1157TUOGDPG7fseOHUpKSpIkORwO32pVSTpw4IAOHDggh8MR8PgEcAAAANXUrl07paSkaNKkSdq1a5fWr1+vefPm6ZFHHlG3bt20efNmvfTSS/r222/1xhtv6N1339XQoUMlSQMHDtR7772n1atXa8eOHZo4caK6du2qZs2aBTy+xeu9NN4APOXcHeopAKgh9sQuoZ4CgBpS5d4fsrFd2ZOD/kz7oNkBX1tSUqKZM2dq48aNstvtGjRokEaOHCmLxaJ169Zp4cKF2rNnj5o0aaJx48bp1ltv9d379ttva+HChTp69Kg6deqkmTNnqmHDhgGPTQAHoMYRwAGXr5AGcH/KDPoz7YOzgv7MmkAJFQAAwDBsIwIAAMx0aRQRQ4IADgAAmOkS2QcuFCihAgAAGIYMHAAAMBMZOAAAAJiCDBwAADCTN3wzcARwAADASF5P+K5CpYQKAABgGDJwAADATCxiAAAAgCnIwAEAADOxiAEAAMAwLGIAAACAKcjAAQAAM7GIAQAAAKYgAwcAAMwUxhk4AjgAAGAmL4sYAAAAYAgCOJzXbx97UpNnLfjJ/nc++ER3DnxY13fvq4EP/05bCrYFbezKSremZj2jG3sOUNe77tMrb+b49edv/Ub3jxyv67v3Ve/fDNea9z8K2tgAAmez2bRi+Xw5D21X0d4tGve7kaGeEsKBxxP8wxAEcPiPPlz3P/p84+af7N/wt681e8FSjRwyUDmvLNEvr79Oox57UodKvwvK+AuWvKhtO3bqpYVPacqEDD2/8nV98tfPJUnO7w7rt489qetTr9Walxdr1PBBynrmea3/clNQxgYQuDlPTVFamkM9bv21Ro+dpKlTxqlfvztCPS3gskUAh5909NhxLVjyktq2vuYnr3n3w091V69b1LvnzfpF00SNGTFYsY0a6rON1QuilryUfVaWr9xVoZw/f6wnHn1EbVpdpe43ddLQ++/RGzl/liT95bONimnUUL97ZIiaN2ui27t31V29btGHn/y1+j8WwAWLirJr2NCBGj/+Sf09b6vee+8jzV/wvDJ+OyTUU8PlzuMN/mEIFjHgJ81b/ILu7HmLSp0/nU0bev89qhtlP6v9+ImTkiS3260FS1fqg38FVZ06pmnSuN+qfr0rzjv+P3btVtXpKqW2a+1rS702RSteXSWPx6PON6Qr+eqks8c+WX7eZwMIHse1KapTp46+3Pi1r+2LLzYp84kxslgs8obxi+aoYWH8Ka0LzsAdOXJEJSUlOnbsWDDng0vEV7l5ys3bqkceGvgfr2vT6io1b9bEd77hb19rT9F+dUxrL0l6bvmr2vrN/+r5+TO0ctFTOnHypMZPmR3QHJzOw2pQv77q1Knja4tp1ECVbrfKjh5Tk8YJcrT9Ibj77kiZ/nvdet3wr7EBXBxXNo6X03lYp06d8rWVHCqV3W5XTEzDEM4MuHxVKwP3ySefKDs7WwUFBaqsrPS1R0ZGqm3btnrwwQfVvXv3oE8SF1dlpVvT5y7SlAkZirTZAr7v233Fmjz7ad1xaze1aXWVXBUVeiPnfb310kJd0/K/JElZU3+vzrffq/8t/KeOHz+pRx6bKkk6dapK8nr1yf9skCQtmz9TrspKWX8UvEnynbt/9B8KSaqorNS4SbMU26ih7unT64J/O4Dqi4qyq7LS7df2/bmtGv8OAarNoJJnsAUcwL388stavHixhg8frtGjRysmJkZWq1Vut1tOp1Nff/21nnjiCT366KN64IEHanLOqGFLV76ulOSr1aljWsD37Pl2n4b/bpKaNWms6U88KknaV3xQp05V6f6R4/2u9Xg82vvtfnX55fXKeWWJJCl79Xs6VPqdxo8aKkmKj4uR8/CRswK178/tkZG+tvJyl8Y8MV17ivbrtefn+/UBqHkVFZWy2ax+bd+fl5e7QjEl4LIXcAC3cuVKzZkz55wZtpYtW6pjx45q1aqVZs6cSQBnuI/+sl7O747o+u59JUmn3GeCpk/+Z4M2r3vnrOt37d6rYY9mqmnilVq2YIYva3e66rQk6bWl8xX1b+/JxTRsoEibTb9omihJql/vCp08We47l84EcWVHj6qq6rRq146QJH333RFF2my6IrquJOnEyZN6ZMKT+nZfsVYufMqvnAvg4ijef1CxsY0UERGh06fP/P/+yoR4lZe7VFZ2NMSzw+XMa9C2H8EWcABXUVGhpk2b/sdrEhISdPz48Z89KYTWy4vnqOpfwZckPb10pST5smM/Vuo8rBHjJqt500QtWzDTL1Br1qSxIiJqqezYMSVf01LSmffUnsx6Ro+PHam6daP+4zySr05S7YjaKtj2ja5ztJUkbSnYpratr1atWrXk8Xj0u0mztK/4gF5ZMldJzZv97N8OoPry8rfq1KlTuqHjdfriyzPbDnXq1EFff53HAgbULEqo59ejRw898cQTmjJlitq3b6/atX+41ePxKC8vT3/4wx/Us2fPGpkoLp7EKxP8zr9fZfqLpok6ffq0jpQdVf16V6hOnTqav/gFnfZ4NCPzdyp3uVTuOlMuibLbVbdulPrfeZtmzl+iP0wcq5iG9TV30QsqPnhITRr7j5ExbNBZ87BHRuquXt01Y95izZw8TodKv9Mrb+Zo5qQzJdm3136sTVsKtGjOH1Qvuq6c3x2WJNWpUyegVa4AgsPlqtCfXlujJUue0vDh45XY5EqNHzdSwx4ef/6bAVwQizfAvx653W7NmTNHa9as0enTp9WgQQPfO3BlZWWqXbu27r77bmVmZiryAt5BOuXcXe17cHF8vz/b7CkTtP9AiXoOGKKVi+bo+tR2uv6Wvqr40YKW7/126P3KGDZIrooKzV/8oj76y2eqqqpSWvt2mjTut2qaeGVAY7sqKjRz3mJ9uv4LXVG3rh66r78euPdMaXfk+Cn64qvcs+5JT22nVxbP/Rm/GMFmT+wS6imghtntkVqy+Cn163u7jh49pgVPL9PCRS+Gelq4CKrc+0M29slZZ//l/+eqOyU76M+sCQEHcN9zuVzasWOHSktL5XK5ZLPZlJCQoNatW19Q4PY9Ajjg8kUAB1y+COBCo9ob+drtdqWmptbEXAAAAALHO3AAAACGCeNVqHwLFQAAwDBk4AAAgJnCuIRKBg4AAMAwZOAAAICZvOH7DhwBHAAAMBMlVAAAAFTH3r17NWzYMKWmpqpr16568cUfNq8uKirSkCFD1L59e91+++3asGGD371ffvmlevfuLYfDocGDB6uoqKhaYxPAAQAAI3k9nqAfgfJ4PBoxYoQaNmyod955R9OnT9fzzz+vP//5z/J6vcrIyFBsbKxycnJ09913a/To0SouLpYkFRcXKyMjQ/369dOaNWvUqFEjjRo1qlrfDqaECgAAUE1Op1OtW7fWtGnTFB0drRYtWujGG29Ubm6uYmNjVVRUpFWrVikqKkotW7bUxo0blZOTozFjxmj16tVq27athg4dKknKyspSp06dtGnTJnXs2DGg8cnAAQAAM3m8wT8CFB8fr2effVbR0dHyer3Kzc3V5s2b1aFDB+Xn56tNmzaKioryXZ+Wlqa8vDxJUn5+vtLT0319drtdKSkpvv5AEMABAAAzhTCA+7Gbb75Z9913n1JTU9WzZ0+VlpYqPj7e75qYmBgdPHhQks7bHwgCOAAAgJ9h4cKFWrZsmb755htlZWXJ5XLJarX6XWO1WuV2uyXpvP2B4B04AABgpktkH7h27dpJkiorK/XYY4+pf//+crlcfte43W5FRkZKkmw221nBmtvtVr169QIekwwcAABANTmdTq1bt86v7aqrrtKpU6cUFxcnp9N51vXfl00TEhLO2R8XFxfw+ARwAADATCF8B27fvn0aPXq0SkpKfG1bt25Vo0aNlJaWpm3btqmiosLXl5ubK4fDIUlyOBzKzc319blcLm3fvt3XHwgCOAAAYCSvxxv0I1Dt2rVTSkqKJk2apF27dmn9+vWaN2+eHnnkEXXo0EGNGzdWZmamdu7cqRUrVqigoEADBgyQJPXv319btmzRihUrtHPnTmVmZqpp06YBbyEiEcABAABUW0REhJYuXSq73a57771XkydP1gMPPKDBgwf7+kpLS9WvXz+9//77WrJkiRITEyVJTZs21aJFi5STk6MBAwaorKxMS5YskcViCXh8i7c62/7WoFPO3aGeAoAaYk/sEuopAKghVe79IRv7+NjeQX/mFQvXBv2ZNYFVqAAAwEzV+PTV5YYSKgAAgGHIwAEAADNd4JcTLgdk4AAAAAxDBg4AAJgpjDNwBHAAAMBIl8hGGiFBCRUAAMAwZOAAAICZwriESgYOAADAMGTgAACAmcI4A0cABwAAjFSdj89fbiihAgAAGIYMHAAAMBMZOAAAAJiCDBwAADCTJ9QTCB0COAAAYCQWMQAAAMAYZOAAAICZyMABAADAFGTgAACAmVjEAAAAYBYWMQAAAMAYZOAAAICZwriESgYOAADAMGTgAACAkcL5HTgCOAAAYCZKqAAAADAFGTgAAGAkLxk4AAAAmIIMHAAAMFMYZ+AI4AAAgJEooQIAAMAYZOAAAICZyMABAADAFGTgAACAkcL5HTgCOAAAYKRwDuAooQIAABiGDBwAADASGTgAAABUS0lJicaOHasOHTqoS5cuysrKUmVlpSRp1qxZatWqld+RnZ3tu3ft2rXq3r27HA6HMjIydPjw4WqNTQYOAACYyWsJ3dBer8aOHat69erp9ddf19GjRzVp0iTVqlVLjz/+uAoLCzVhwgT17dvXd090dLQkqaCgQJMnT9b06dOVnJys2bNnKzMzU8uXLw94fDJwAADASF5P8I9A7d69W3l5ecrKytLVV1+t9PR0jR07VmvXrpUkFRYWqk2bNoqLi/MddrtdkpSdna1evXqpT58+Sk5O1ty5c7V+/XoVFRUFPD4BHAAAQDXFxcXpxRdfVGxsrF/7iRMndOLECZWUlKhFixbnvDc/P1/p6em+88aNGysxMVH5+fkBj08ABwAAjOT1WIJ+BKpevXrq0qWL79zj8Sg7O1s33HCDCgsLZbFYtGzZMv3qV7/SXXfdpXfeecd37aFDhxQfH+/3vJiYGB08eDDg8XkHDgAAGOlSWoU6b948bd++XWvWrNG2bdtksViUlJSkQYMGafPmzZo6daqio6PVo0cPVVRUyGq1+t1vtVrldrsDHo8ADgAA4GeYN2+eXn31VT3zzDO65pprdPXVV6tbt25q0KCBJCk5OVl79uzRm2++qR49eshms50VrLndbt87coGghAoAAIzk9VqCflTXzJkz9fLLL2vevHnq2bOnJMlisfiCt+8lJSWppKREkpSQkCCn0+nX73Q6FRcXF/C4BHAAAAAXYPHixVq1apWefvpp3XHHHb725557TkOGDPG7dseOHUpKSpIkORwO5ebm+voOHDigAwcOyOFwBDw2JVQAAGCkUL4DV1hYqKVLl2rEiBFKS0tTaWmpr69bt25asWKFXnrpJfXo0UMbNmzQu+++qz/96U+SpIEDB+qBBx5Q+/bt1a5dO82ePVtdu3ZVs2bNAh7f4vV6vUH/VRfglHN3qKcAoIbYE7uc/yIARqpy7w/Z2EXX3xL0Zzbb/JeArluxYoUWLFhwzr5//OMfWrdunRYuXKg9e/aoSZMmGjdunG699VbfNW+//bYWLlyoo0ePqlOnTpo5c6YaNmwY8DwJ4ADUOAI44PIVrgFcqFFCBQAARro0UlChwSIGAAAAw5CBAwAARqrOlxMuNwRwAADASOEcwFFCBQAAMAwZOAAAYCQWMQAAAMAYZOAAAICRwvkdOAI4AABgpAv5+PzlghIqAACAYcjAAQAAI4XyY/ahRgYOAADAMGTgAACAkTxh/A4cARwAADASixgAAABgDDJwAADASOG8DxwZOAAAAMOQgQMAAEYK52+hEsABAAAjUUIFAACAMcjAAQAAI4XzPnBk4AAAAAxDBg4AABgpnDfyJYADAABGCudVqJRQAQAADEMGDgAAGIlFDAAAADAGGTgAAGAkFjEAAAAYhkUMAAAAMAYZOAAAYCQWMQAAAMAYl0wGrsEvbg71FADUkIPdrgr1FABchljEAAAAYBhKqAAAADAGGTgAAGCkMN5FhAAOAACYiRIqAAAAjEEABwAAjOT1WoJ+VEdJSYnGjh2rDh06qEuXLsrKylJlZaUkqaioSEOGDFH79u11++23a8OGDX73fvnll+rdu7ccDocGDx6soqKiao1NAAcAAFBNXq9XY8eOlcvl0uuvv65nnnlGf/3rX/Xss8/K6/UqIyNDsbGxysnJ0d13363Ro0eruLhYklRcXKyMjAz169dPa9asUaNGjTRq1Ch5q/FtMN6BAwAARvKEcOzdu3crLy9PX3zxhWJjYyVJY8eO1Zw5c/SrX/1KRUVFWrVqlaKiotSyZUtt3LhROTk5GjNmjFavXq22bdtq6NChkqSsrCx16tRJmzZtUseOHQManwwcAAAwkleWoB+BiouL04svvugL3r534sQJ5efnq02bNoqKivK1p6WlKS8vT5KUn5+v9PR0X5/dbldKSoqvPxAEcAAAANVUr149denSxXfu8XiUnZ2tG264QaWlpYqPj/e7PiYmRgcPHpSk8/YHggAOAAAYyeMN/nGh5s2bp+3bt2vcuHFyuVyyWq1+/VarVW63W5LO2x8IAjgAAICfYd68eXr11Vc1b948XXPNNbLZbGcFY263W5GRkZL0k/12uz3gMQngAACAkTyyBP2orpkzZ+rll1/WvHnz1LNnT0lSQkKCnE6n33VOp9NXNv2p/ri4uIDHJYADAABGCuUiBklavHixVq1apaefflp33HGHr93hcGjbtm2qqKjwteXm5srhcPj6c3NzfX0ul0vbt2/39QeCAA4AAKCaCgsLtXTpUj388MNKS0tTaWmp7+jQoYMaN26szMxM7dy5UytWrFBBQYEGDBggSerfv7+2bNmiFStWaOfOncrMzFTTpk0D3kJEIoADAACG8tTAEai//OUvOn36tJ5//nl17tzZ74iIiNDSpUtVWlqqfv366f3339eSJUuUmJgoSWratKkWLVqknJwcDRgwQGVlZVqyZIkslsAzgBZvdbb9rUF1o1qEegoAasjeLs1DPQUANST24/UhG/vThHuD/sweJW8F/Zk1gS8xAAAAI1X3nbXLCQEcAAAwUig/pRVqvAMHAABgGDJwAADASGTgAAAAYAwycAAAwEgsYgAAADCMJ3zjN0qoAAAApiEDBwAAjHQhH5+/XJCBAwAAMAwZOAAAYKRL4lugIUIABwAAjMQ+cAAAADAGGTgAAGAkj4VFDAAAADAEGTgAAGAkFjEAAAAYhkUMAAAAMAYZOAAAYCS+hQoAAABjkIEDAABGCudvoRLAAQAAI4XzKlRKqAAAAIYhAwcAAIzEIgYAAAAYgwwcAAAwUjhv5EsABwAAjMQiBgAAABiDDBwAADASixgAAABgDDJwAADASCxiAAAAMEw4B3CUUAEAAAxDBg4AABjJG8aLGAjgAACAkSihAgAAwBhk4AAAgJHIwAEAAMAYBHAAAMBI3ho4LoTb7Vbv3r311Vdf+dpmzZqlVq1a+R3Z2dm+/rVr16p79+5yOBzKyMjQ4cOHqzUmJVQAAGCkS+FTWpWVlZowYYJ27tzp115YWKgJEyaob9++vrbo6GhJUkFBgSZPnqzp06crOTlZs2fPVmZmppYvXx7wuARwAAAAF2DXrl2aMGGCvN6zc3eFhYUaNmyY4uLizurLzs5Wr1691KdPH0nS3Llz1a1bNxUVFalZs2YBjU0JFQAAGMlTA0d1bNq0SR07dtRbb73l137ixAmVlJSoRYsW57wvPz9f6enpvvPGjRsrMTFR+fn5AY9NBg4AAOAC3HfffedsLywslMVi0bJly/TZZ5+pQYMGeuihh3zl1EOHDik+Pt7vnpiYGB08eDDgsQngAACAkS7VbUR2794ti8WipKQkDRo0SJs3b9bUqVMVHR2tHj16qKKiQlar1e8eq9Uqt9sd8BgEcAAAwEgXumq0pvXp00fdunVTgwYNJEnJycnas2eP3nzzTfXo0UM2m+2sYM3tdstutwc8Bu/AAQAABJHFYvEFb99LSkpSSUmJJCkhIUFOp9Ov3+l0nnPBw08hgAMAAEbyWIJ/BMNzzz2nIUOG+LXt2LFDSUlJkiSHw6Hc3Fxf34EDB3TgwAE5HI6AxyCAAwAACKJu3bpp8+bNeumll/Ttt9/qjTfe0LvvvquhQ4dKkgYOHKj33ntPq1ev1o4dOzRx4kR17do14C1EJN6BAwAAhrpUFzFce+21eu6557Rw4UI999xzatKkiRYsWKDU1FRJUmpqqmbMmKGFCxfq6NGj6tSpk2bOnFmtMSzec+0+FwJ1o1qEegoAasjeLs1DPQUANST24/UhGzur+aCgPzNzb/b5L7oEUEIFAAAwDCVUAABgJM8lu5FIzSMDBwAAYBgycAAAwEiX6iKGi4EADgAAGCl8C6iUUAEAAIxDBg4AABgpnEuoZOAAAAAMQwYOAAAYKVjfLjURARwAADAS+8ABAADAGGTgAACAkcI3/0YGDgAAwDhk4AAAgJHCeRsRAjgAAGAkFjEAAADAGGTgAACAkcI3/0YGDgAAwDhk4AAAgJFYxAAAAGAYFjEAAADAGGTgAACAkcI3/0YGDgAAwDhk4AAAgJFYxAAAAGAYbxgXUSmhAgAAGIYMHAAAMBIlVAAAAMOwDxwAAACMQQYOAAAYKXzzb2TgAAAAjEMAh4vCarVq8+aP1aXLDb625s2bau3abB0q3a6vcz/VLbd0CeEMgfBRKyZWV0yZrkZr/qyGr69R3REZUh3rOa+NaJGk+gsWKeb9T9Rg2cuq40gN3kTqWBU9bqIa5axVozfelr3/r/26aye3Uf1nlijm3f9Wgxdfk+22O4I3Ni4LHnmDfpiCAA41zmaz6ZVXF6pNSiu/9rfeekElJaXq0vlOrXrzHb25armaNk0M0SyB8HHFlBmy2CJ1dMIYHc+aIesNv1TUg0PPus4SVVf1s+br9Ld7deSRh+T+4jNd8eRMWeo3CMo86j78W9W+ppWOPj5OJxY/I/v9Q2TtfNOZsRs2Ur1Zc3WqIE9HRg1X+WsvK3rUo6rT4YbzPBXhxFMDhykI4FCjkpOv0v+sf0dJ/9Xcr/2mm27UfyX9QmPGTNI//lGo+fOXatNXWzT4wV//xJMABENEs1+oTpsUHV/wlE7v3aOqrQU6+aeVsnXrfta1th63yVvh0olFT8tTvF/lr72s0/v3q/Y1rc7x5J8WNWiIoic88W8Pj1TkbXfo5POLdHrXTrm//Fyu1W/KflffM92/7CzPkcMqf/kFeYr3y73+/6pi3cfnnCcQjljEgBrVucsN+mz9Rk2bNk/O73b42q/vkKq8vK0qL3f52r7c+LU6drguFNMEwobn8GEdnfSYvGVH/Npr1a171rV1rm0v98YvJM8PeYmjY0f+6II6qjvsEdluPhNUub/epJPPL5T3+PHzzqN2y5ZS7Qid2r7V11a17f8pauADksUi99ebVFW466z7zjVPhK9w/hIDARxq1IsvZJ+z/cor43XgwCG/tkOHnEpscuXFmBYQtrwnT+hU7uYfGiwW2e/qK3felrOujWjcWFX/+EbRjz4m6w2/1OmSgzq5Yqmq/hV01X3oYdVulaxjUx+Xt9Ktug89rCsmT9exJ8afdx61GsXIe/SoVFXla/McOSKLzSZLvXrylByUp+TgD9Os30C2rjer/LVXLvzHA5cRAjiERFSUXe5Kt1+bu7JSNtu5X6QGUDOihj+i2lddo7IxI8/qs9jtst97nyrezdHRKY/L1vVm1c+aryPDH5Dn2DFF3tlXZWNG6vSe3ZKk43Nnq9Hq9xXRIkmW6GjVnzXnzINq15EsFtm6nHm/7eiUx2WxRcp76pTfeN5TZ/6dYKlj9c+rWK2q9+RMeQ4fVsWH7wf9zwDmMumdtWCrVgC3efPm81/0L9dff321J4PwUVFRqUaNovzarDabXD8qqQKoWVHDRsred4CO/3G6Tu/951n93tOndbpwl8pfe1mSVF64U9brrpftlp5yb/xCFqtVDZ5d4nePJSJCEU2byr3pbzoyargkyX53f9WKjdXJl5ZLkjzOUtVq2EiWOnX87/3XSlhvZcUPjZF21Zs2WxFNmqps/GipsjJovx/mo4QaoBkzZmjXrjPvJHi9P/2HZrFY9M033/y8meGyVlx8UK1bX+PXlpAQp4MHS0M0IyC81B31qCJ736Xjc2bLveGzc17jOXxYp4u+9Ws7vb9IteLipYgISVLZ+DHyVvj/xct75LDkdstTvP/M+fFj8kZF+c4lyfNdqSz160u1IiTPaUlSrUaN5K2okPfECUmSJSpK9WbNVURiEx19fJzf/cClxO12q1+/fpo6dao6duwoSSoqKtLUqVOVl5enxMRETZo0SZ07d/bd8+WXX+qPf/yjioqK5HA4NHv2bDVr1izgMau1CjUnJ0e33HKLWrVqpfz8fO3YseOcB8Ebzmfzpr+rffsURUbafG2/vDFdmzb/PYSzAsKD/f4HFXnHXTr+xxlyr/+/P3ld1TfbVDuppV9bRLNfyFNyQJ4D++U9XaVa9erJU7xfnuL98p48qeiRo2Vp2Oi8c6gq3CVVnVbt1m18bbVT2qnqf3dIXq9kseiKJ2cq4spEHf39ozq9d88F/15cvi6FbUQqKys1fvx47dy509fm9XqVkZGh2NhY5eTk6O6779bo0aNVXFwsSSouLlZGRob69eunNWvWqFGjRho1atR/TI79u2oFcFarVU8//bQk6dlnn63OrYCfzz//Svv2HdCy5fPVuvXVmjDht0pLd+jVV94K9dSAy1pEs+aKun+wXG+9rlPb/p8sDRv5DunM/muynillVnzwvmr/V5KiBg1RrcQmiho8VLWuTFTlXz6V1+VSxX9/oOgx41Xn2vaK+EVzXTFxkmolNpHn4AG/McuzX9GJBU/5T6SyUhXrPlL02PGqfU2yrDd2ln3AvXK9u0aSZLvtDtW5NlXHn50rz4kTP8zziitq/g8JCNCuXbv061//Wt9+65+p/tvf/qaioiLNmDFDLVu21MiRI9W+fXvl5ORIklavXq22bdtq6NChuvrqq5WVlaX9+/dr06ZNAY9d7UUMVqtVCxYsqNYgwL/zeDy699cPa+nzc7Xhi7XaXbhHA38zUvv2FYd6asBlzXpjJ1kiaivq/gcVdf+Dfn3OnjcpZtU7Oj4/S5WffiTPoRIdnfR7RY8aK/u99+n0t9/q2NTH5fnOKUk6uWKJ9PAoXTF1hhRRW1Vb83VsykS/bUf+k5PLlyh6zHjVn/uMPCdPqvy1l+X+4nNJkq3zr2SJiFD9mXP87jmV/3cdnfi7n/8HgcuCpxoZq5qwadMmdezYUePGjVP79u197fn5+WrTpo2ion541zstLU15eXm+/vT0dF+f3W5XSkqK8vLyfCXY87mgVagtW7ZUy5Ytz38h8CN1o1r4ne/evVe39bw3NJMBwpTr/7wh1/954yf7nT1v8juv2r5VZaNHnPviykqdXPyMTi5+5sImU1mpE/OzdGJ+1lldxyZPvLBnIqyEegnDfffdd8720tJSxcfH+7XFxMTo4MGDAfUHgi8xAAAABJHL5ZLV6r8tltVqldvtDqg/EOwDBwAAjHSpfnzeZrOprKzMr83tdisyMtLX/+/BmtvtVr169QIegwwcAABAECUkJMjpdPq1OZ1OX9n0p/rj4uICHoMADgAAGMlbA/8Eg8Ph0LZt21RR8cOm1Lm5uXI4HL7+3NxcX5/L5dL27dt9/YEggAMAAEa6FPaBO5cOHTqocePGyszM1M6dO7VixQoVFBRowIABkqT+/ftry5YtWrFihXbu3KnMzEw1bdo04BWoEgEcAABAUEVERGjp0qUqLS1Vv3799P7772vJkiVKTEyUJDVt2lSLFi1STk6OBgwYoLKyMi1ZskQWiyXgMSze6mz7W4P+fYsJAJePvV2ah3oKAGpI7MfrQzb2Pc3vDvozV+99L+jPrAlk4AAAAAzDNiIAAMBIwVp0YCICOAAAYKRgLTowESVUAAAAw5CBAwAARrpE1mGGBBk4AAAAw5CBAwAARrpUv4V6MRDAAQAAI7GIAQAAAMYgAwcAAIwUzvvAkYEDAAAwDBk4AABgJBYxAAAAGIZ94AAAAGAMMnAAAMBIbCMCAAAAY5CBAwAARgrnbUQI4AAAgJHCeRUqJVQAAADDkIEDAABGCudtRAjgAACAkSihAgAAwBhk4AAAgJHCeRUqGTgAAADDkIEDAABG8rCIAQAAwCzhG75RQgUAADAOGTgAAGAkthEBAACAMcjAAQAAI4VzBo4ADgAAGCmcP6VFCRUAAMAwZOAAAICRwrmESgYOAADAMGTgAACAkcL5W6gEcAAAwEgsYgAAAIAxyMABAAAjsYgBAAAA1fLpp5+qVatWfsfYsWMlSdu3b9c999wjh8Oh/v37a+vWrUEdmwwcAAAwUqjfgdu1a5e6deummTNn+tpsNpvKy8s1YsQI3XnnnXrqqaf05ptvauTIkfr0008VFRUVlLHJwAEAACN55A36UR2FhYW65pprFBcX5zvq1aunDz/8UDabTRMnTlTLli01efJk1a1bVx999FHQfjsBHAAAwAUoLCxUixYtzmrPz89XWlqaLBaLJMlisei6665TXl5e0MYmgAMAAEby1sA/AY/t9eqf//ynNmzYoJ49e6p79+6aP3++3G63SktLFR8f73d9TEyMDh48GLTfzjtwAAAA1VRcXCyXyyWr1apnn31W+/bt06xZs1RRUeFr/zGr1Sq32x208QngAACAkTwhXMTQpEkTffXVV6pfv74sFotat24tj8ej3//+9+rQocNZwZrb7VZkZGTQxieAAwAARgr1p7QaNGjgd96yZUtVVlYqLi5OTqfTr8/pdJ5VVv05eAcOAACgmj7//HN17NhRLpfL1/bNN9+oQYMGSktL09///nffNider1dbtmyRw+EI2vgEcAAAwEgerzfoR6BSU1Nls9k0ZcoU7d69W+vXr9fcuXM1fPhw3XbbbTp27Jhmz56tXbt2afbs2XK5XOrVq1fQfjsBHAAAQDVFR0frpZde0uHDh9W/f39NnjxZ9957r4YPH67o6GgtX75cubm56tevn/Lz87VixYqgbeIrSRZvqLcx/pe6US1CPQUANWRvl+ahngKAGhL78fqQjZ0cf33Qn7nj0OagP7MmsIgBAAAYKZSrUEONEioAAIBhyMABAAAjhXobkVAiAwcAAGAYMnAAAMBI4fwOHAEcAAAwEiVUAAAAGIMMHAAAMJLX6wn1FEKGDBwAAIBhyMABAAAjecL4HTgCOAAAYKRL5GugIUEJFQAAwDBk4AAAgJHCuYRKBg4AAMAwZOAAAICRwvkdOAI4AABgpHD+lBYlVAAAAMOQgQMAAEYK52+hEsABAAAjhfM7cJRQAQAADEMGDgAAGIl94AAAAGAMMnAAAMBI4fwOHAEcAAAwEvvAAQAAwBhk4AAAgJHCuYRKBg4AAMAwZOAAAICRwnkbEQI4AABgJEqoAAAAMAYZOAAAYCS2EQEAAIAxyMABAAAjeVnEAAAAYBZKqAAAADAGGTgAAGAkthEBAACAMcjAAQAAI4XzIgYycAAAwEherzfoR3VUVlZq0qRJSk9PV+fOnbVy5coa+qVnIwMHAABwAebOnautW7fq1VdfVXFxsR5//HElJibqtttuq/GxCeAAAICRQrmIoby8XKtXr9YLL7yglJQUpaSkaOfOnXr99dcvSgBHCRUAAKCaduzYoaqqKqWmpvra0tLSlJ+fL4/HU+PjE8ABAAAjeWvgCFRpaakaNmwoq9Xqa4uNjVVlZaXKysp+1u8KxCVTQj1ZvifUUwAAAAapcu8P2dgul8sveJPkO3e73TU+Phk4AACAarLZbGcFat+fR0ZG1vj4BHAAAADVlJCQoCNHjqiqqsrXVlpaqsjISNWrV6/GxyeAAwAAqKbWrVurdu3aysvL87Xl5uaqXbt2qlWr5sMrAjgAAIBqstvt6tOnj6ZNm6aCggKtW7dOK1eu1ODBgy/K+BZvOH8JFgAA4AK5XC5NmzZNn3zyiaKjozVs2DANGTLkooxNAAcAAGAYSqgAAACGIYADAAAwDAEcAACAYQjgcNFUVlZq0qRJSk9PV+fOnbVy5cpQTwlAkLndbvXu3VtfffVVqKcCXNYumU9p4fI3d+5cbd26Va+++qqKi4v1+OOPKzExUbfddluopwYgCCorKzVhwgTt3Lkz1FMBLnsEcLgoysvLtXr1ar3wwgtKSUlRSkqKdu7cqddff50ADrgM7Nq1SxMmTBAbGwAXByVUXBQ7duxQVVWVUlNTfW1paWnKz8+Xx+MJ4cwABMOmTZvUsWNHvfXWW6GeChAWyMDhoigtLVXDhg1ltVp9bbGxsaqsrFRZWZkaNWoUwtkB+Lnuu+++UE8BCCtk4HBRuFwuv+BNku/c7XaHYkoAABiLAA4Xhc1mOytQ+/48MjIyFFMCAMBYBHC4KBISEnTkyBFVVVX52kpLSxUZGal69eqFcGYAAJiHAA4XRevWrVW7dm3l5eX52nJzc9WuXTvVqsX/DAEAqA7+y4mLwm63q0+fPpo2bZoKCgq0bt06rVy5UoMHDw711AAAMA6rUHHRZGZmatq0aXrwwQcVHR2tMWPG6NZbbw31tAAAMI7Fy66LAAAARqGECgAAYBgCOAAAAMMQwAEAABiGAA4AAMAwBHAAAACGIYADAAAwDAEcAACAYQjgAAAADEMABwAAYBgCOAAAAMMQwAEAABiGAA4AAMAw/x+mrJCfeBjclwAAAABJRU5ErkJggg==",
|
261 |
+
"text/plain": [
|
262 |
+
"<Figure size 800x550 with 2 Axes>"
|
263 |
+
]
|
264 |
+
},
|
265 |
+
"metadata": {},
|
266 |
+
"output_type": "display_data"
|
267 |
+
}
|
268 |
+
],
|
269 |
+
"source": [
|
270 |
+
"cf = confusion_matrix(y_test, voting_predcitions)\n",
|
271 |
+
"sn.heatmap(cf, annot=True)"
|
272 |
+
]
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"cell_type": "code",
|
276 |
+
"execution_count": 54,
|
277 |
+
"metadata": {},
|
278 |
+
"outputs": [
|
279 |
+
{
|
280 |
+
"name": "stdout",
|
281 |
+
"output_type": "stream",
|
282 |
+
"text": [
|
283 |
+
" precision recall f1-score support\n",
|
284 |
+
"\n",
|
285 |
+
" 0 0.98 1.00 0.99 423\n",
|
286 |
+
" 1 1.00 0.96 0.98 270\n",
|
287 |
+
"\n",
|
288 |
+
" accuracy 0.99 693\n",
|
289 |
+
" macro avg 0.99 0.98 0.98 693\n",
|
290 |
+
"weighted avg 0.99 0.99 0.99 693\n",
|
291 |
+
"\n"
|
292 |
+
]
|
293 |
+
}
|
294 |
+
],
|
295 |
+
"source": [
|
296 |
+
"cls_report = classification_report(y_test, voting_predcitions)\n",
|
297 |
+
"print(cls_report)"
|
298 |
+
]
|
299 |
+
}
|
300 |
+
],
|
301 |
+
"metadata": {
|
302 |
+
"kernelspec": {
|
303 |
+
"display_name": "Python 3",
|
304 |
+
"language": "python",
|
305 |
+
"name": "python3"
|
306 |
+
},
|
307 |
+
"language_info": {
|
308 |
+
"codemirror_mode": {
|
309 |
+
"name": "ipython",
|
310 |
+
"version": 3
|
311 |
+
},
|
312 |
+
"file_extension": ".py",
|
313 |
+
"mimetype": "text/x-python",
|
314 |
+
"name": "python",
|
315 |
+
"nbconvert_exporter": "python",
|
316 |
+
"pygments_lexer": "ipython3",
|
317 |
+
"version": "3.10.1"
|
318 |
+
},
|
319 |
+
"orig_nbformat": 4
|
320 |
+
},
|
321 |
+
"nbformat": 4,
|
322 |
+
"nbformat_minor": 2
|
323 |
+
}
|