19kmunz commited on
Commit
5e87978
1 Parent(s): 0519586

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -0
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """GradioInterface_v2.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+ """
6
+
7
+ # Commented out IPython magic to ensure Python compatibility.
8
+ # # Capture to supress the download ouput
9
+ # %%capture
10
+ # !pip install gradio
11
+ # !pip install pandas
12
+ # !pip install transformers
13
+ # !pip install parsezeeklogs
14
+ # !pip install elasticsearch
15
+
16
+ # Define imports for model use
17
+ import torch
18
+ from transformers import pipeline
19
+ from parsezeeklogs import ParseZeekLogs
20
+ from transformers import BertTokenizer
21
+ import gradio as gr
22
+ import pandas as pd
23
+
24
+ # Define model
25
+ pipe = pipeline(model="19kmunz/IoT-23-BERT-Network-Logs-Classification", tokenizer=BertTokenizer.from_pretrained("bert-base-cased"))
26
+
27
+ # Define string constants
28
+ LOG = "conn.log Output"
29
+ HEADER_TABLE = "Headers Table"
30
+ SENTENCES = "Sentences"
31
+ OUT = "out"
32
+ INPUT_TYPES = [LOG, HEADER_TABLE, SENTENCES]
33
+ STEPS = [HEADER_TABLE, SENTENCES]
34
+ HEADERS=['id.resp_p', 'proto', 'conn_state', 'orig_pkts', 'orig_ip_bytes', 'resp_ip_bytes']
35
+
36
+ # Define sentence-ization functions
37
+ # Dictionary of feature names to use in the make sentence function
38
+ feature_names = {'id.resp_p':'response port',
39
+ 'proto':'transport protocol',
40
+ 'orig_pkts':'number of packets sent by the origin',
41
+ 'conn_state':'connection state',
42
+ 'orig_ip_bytes':'number of IP level bytes sent by the originator',
43
+ 'resp_ip_bytes':'number of IP level bytes sent by the responder'}
44
+
45
+ # Function to make sentences out of the data
46
+ def make_sentence(row):
47
+ sentences = {}
48
+ for feature in row.keys():
49
+ if feature == 'label' or feature == "#":
50
+ sentences[feature] = row[feature]
51
+ else:
52
+ sentences[feature] = feature_names[feature] + " is " + str(row[feature]) + "."
53
+ return sentences
54
+
55
+ # Take all sentence observations and make them into paragraph inputs
56
+ def make_paragraphs(ser):
57
+ paragraphs_list = []
58
+ for index,obs in ser.items():
59
+ new_para = obs['id.resp_p'] + " " + obs['proto'] + " " + obs['conn_state'] + " " + obs['orig_pkts'] + " " + obs['orig_ip_bytes'] + " " + obs['resp_ip_bytes']
60
+ paragraphs_list.append(new_para)
61
+ return pd.Series(paragraphs_list, name="Sentences", index=ser.index).to_frame()
62
+
63
+ # Define prediction Functions For Different Settings
64
+ def predictFromSentences(sentenceTable):
65
+ output = pipe(sentenceTable[SENTENCES].tolist()) # This does the prediction!
66
+ return { OUT: pd.DataFrame({"Output": ["Malicious" if pred['label'] == "LABEL_0" else "Benign" for pred in output] }) }
67
+
68
+ def predictFromHeaderTable(headerTable):
69
+ sentences = headerTable.apply(make_sentence, axis=1);
70
+ paragraphs = make_paragraphs(sentences)
71
+ return {
72
+ SENTENCES: paragraphs,
73
+ OUT: predictFromSentences(paragraphs)[OUT]
74
+ }
75
+
76
+ def predictFromFileUpload(fileUpload):
77
+ if(fileUpload is None):
78
+ raise gr.Error("No file uploaded")
79
+ fileType = fileUpload.split('.')[-1]
80
+ if(fileType == 'csv'):
81
+ dataFrame = pd.read_csv(fileUpload, usecols=HEADERS)
82
+ elif(fileType == 'log' or fileType == 'labeled'):
83
+ with open('out.csv',"w") as outfile:
84
+ for log_record in ParseZeekLogs(fileUpload, output_format="csv", safe_headers=False, fields=HEADERS):
85
+ if log_record is not None:
86
+ outfile.write(log_record + "\n")
87
+ dataFrame = pd.read_csv('out.csv', names=HEADERS)
88
+ result = predictFromHeaderTable(dataFrame)
89
+ toReturn = {
90
+ HEADER_TABLE: dataFrame,
91
+ SENTENCES: result[SENTENCES],
92
+ OUT: result[OUT]
93
+ }
94
+ return toReturn
95
+
96
+ def makeIndexColumn(allInputs):
97
+ def _makeIndexColumnFor(column):
98
+ theseHeaders = allInputs[column].columns
99
+ newHeaders = ['#', *theseHeaders]
100
+ allInputs[column]['#'] = allInputs[column].index
101
+ allInputs[column] = allInputs[column][newHeaders]
102
+
103
+ if(SENTENCES in allInputs):
104
+ _makeIndexColumnFor(SENTENCES)
105
+ if(HEADER_TABLE in allInputs):
106
+ _makeIndexColumnFor(HEADER_TABLE)
107
+ if(OUT in allInputs):
108
+ _makeIndexColumnFor(OUT)
109
+ return allInputs
110
+
111
+ def predict(inputType, fileUpload, headerTable, sentenceTable, out):
112
+ output = {};
113
+ if(inputType == LOG):
114
+ # Process File Upload
115
+ output = makeIndexColumn(predictFromFileUpload(fileUpload))
116
+ return [output[HEADER_TABLE], output[SENTENCES], output[OUT]]
117
+ elif(inputType == HEADER_TABLE):
118
+ # Process Header Table
119
+ output = makeIndexColumn(predictFromHeaderTable(headerTable))
120
+ return [headerTable, output[SENTENCES], output[OUT]]
121
+ elif(inputType == SENTENCES):
122
+ # Process From Sentences
123
+ output = makeIndexColumn(predictFromSentences(sentenceTable))
124
+ return [headerTable, sentenceTable, output[OUT]]
125
+
126
+ # Update UI
127
+ def updateInputOutputBlocks(inputType, steps):
128
+ # Update visibility and Interactivity of Gradio Blocks based on Settings
129
+ fileUpload = gr.File(
130
+ visible=(True if inputType == LOG else False),
131
+ interactive=(1 if inputType == LOG else 0)
132
+ )
133
+ headerTable = gr.Dataframe(
134
+ visible=(True if (inputType == HEADER_TABLE or HEADER_TABLE in steps) else False),
135
+ interactive=(1 if inputType == HEADER_TABLE else 0)
136
+ )
137
+ sentenceTable = gr.Dataframe(
138
+ interactive=(1 if inputType == SENTENCES else 0),
139
+ visible=(True if (inputType == SENTENCES or SENTENCES in steps) else False)
140
+ )
141
+ return fileUpload, headerTable, sentenceTable
142
+
143
+ # Create Gradio UI
144
+ with gr.Blocks() as app:
145
+ gr.Markdown("""
146
+ # Network Log Predictions
147
+ Input log information below and click 'Run' to get predictions from our model!
148
+ Access the settings at the bottom for different types of input and to see inbetween steps.
149
+ """)
150
+ # Inputs / Outputs
151
+ fileUpload = gr.File(file_types=[".log", ".log.labeled", ".csv"], label="Zeek Log File", visible=False, file_count='single')
152
+ headerTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(7,"fixed"), headers=['#', *HEADERS], label="Header Inputs", interactive=1)
153
+ sentenceTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=["#", "Sentence"], label="Sentences", interactive=0, visible=False)
154
+ out = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=['#', "Output"], label="Predictions", column_widths=["60px", "100%"])
155
+ btn = gr.Button("Run")
156
+
157
+ # Settings
158
+ with gr.Accordion("Settings", open=False):
159
+ inputType = gr.Radio(INPUT_TYPES, value="Headers Table", label="Input")
160
+ steps = gr.CheckboxGroup(STEPS, label="Display Intermediary Steps")
161
+ inputType.change(
162
+ fn=updateInputOutputBlocks,
163
+ inputs=[inputType, steps],
164
+ outputs=[fileUpload, headerTable, sentenceTable]
165
+ )
166
+ steps.change(
167
+ fn=updateInputOutputBlocks,
168
+ inputs=[inputType, steps],
169
+ outputs=[fileUpload, headerTable, sentenceTable]
170
+ )
171
+ # Assign Callback
172
+ btn.click(
173
+ fn=predict,
174
+ inputs=[inputType, fileUpload, headerTable, sentenceTable, out],
175
+ outputs=[headerTable, sentenceTable, out]
176
+ )
177
+
178
+ app.launch()