yashika0998 commited on
Commit
5b22695
1 Parent(s): 19e5079
Files changed (1) hide show
  1. app.py +177 -0
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """GradioInterface_v2.ipynb
3
+ Automatically generated by Colaboratory.
4
+ """
5
+
6
+ # Commented out IPython magic to ensure Python compatibility.
7
+ # # Capture to supress the download ouput
8
+ # %%capture
9
+ # !pip install gradio
10
+ # !pip install pandas
11
+ # !pip install transformers
12
+ # !pip install parsezeeklogs
13
+ # !pip install elasticsearch
14
+
15
+ # Define imports for model use
16
+ import torch
17
+ from transformers import pipeline
18
+ from parsezeeklogs import ParseZeekLogs
19
+ from transformers import BertTokenizer
20
+ import gradio as gr
21
+ import pandas as pd
22
+
23
+ # Define model
24
+ pipe = pipeline(model="19kmunz/IoT-23-BERT-Network-Logs-Classification", tokenizer=BertTokenizer.from_pretrained("bert-base-cased"))
25
+
26
+ # Define string constants
27
+ LOG = "conn.log Output"
28
+ HEADER_TABLE = "Headers Table"
29
+ SENTENCES = "Sentences"
30
+ OUT = "out"
31
+ INPUT_TYPES = [LOG, HEADER_TABLE, SENTENCES]
32
+ STEPS = [HEADER_TABLE, SENTENCES]
33
+ HEADERS=['id.resp_p', 'proto', 'conn_state', 'orig_pkts', 'orig_ip_bytes', 'resp_ip_bytes']
34
+
35
+ # Define sentence-ization functions
36
+ # Dictionary of feature names to use in the make sentence function
37
+ feature_names = {'id.resp_p':'response port',
38
+ 'proto':'transport protocol',
39
+ 'orig_pkts':'number of packets sent by the origin',
40
+ 'conn_state':'connection state',
41
+ 'orig_ip_bytes':'number of IP level bytes sent by the originator',
42
+ 'resp_ip_bytes':'number of IP level bytes sent by the responder'}
43
+
44
+ # Function to make sentences out of the data
45
+ def make_sentence(row):
46
+ sentences = {}
47
+ for feature in row.keys():
48
+ if feature == 'label' or feature == "#":
49
+ sentences[feature] = row[feature]
50
+ else:
51
+ sentences[feature] = feature_names[feature] + " is " + str(row[feature]) + "."
52
+ return sentences
53
+
54
+ # Take all sentence observations and make them into paragraph inputs
55
+ def make_paragraphs(ser):
56
+ paragraphs_list = []
57
+ for index,obs in ser.items():
58
+ new_para = obs['id.resp_p'] + " " + obs['proto'] + " " + obs['conn_state'] + " " + obs['orig_pkts'] + " " + obs['orig_ip_bytes'] + " " + obs['resp_ip_bytes']
59
+ paragraphs_list.append(new_para)
60
+ return pd.Series(paragraphs_list, name="Sentences", index=ser.index).to_frame()
61
+
62
+ # Define prediction Functions For Different Settings
63
+ def predictFromSentences(sentenceTable):
64
+ output = pipe(sentenceTable[SENTENCES].tolist()) # This does the prediction!
65
+ return { OUT: pd.DataFrame({"Output": ["Malicious" if pred['label'] == "LABEL_0" else "Benign" for pred in output] }) }
66
+
67
+ def predictFromHeaderTable(headerTable):
68
+ sentences = headerTable.apply(make_sentence, axis=1);
69
+ paragraphs = make_paragraphs(sentences)
70
+ return {
71
+ SENTENCES: paragraphs,
72
+ OUT: predictFromSentences(paragraphs)[OUT]
73
+ }
74
+
75
+ def predictFromFileUpload(fileUpload):
76
+ if(fileUpload is None):
77
+ raise gr.Error("No file uploaded")
78
+ fileType = fileUpload.split('.')[-1]
79
+ if(fileType == 'csv'):
80
+ dataFrame = pd.read_csv(fileUpload, usecols=HEADERS)
81
+ elif(fileType == 'log' or fileType == 'labeled'):
82
+ with open('out.csv',"w") as outfile:
83
+ for log_record in ParseZeekLogs(fileUpload, output_format="csv", safe_headers=False, fields=HEADERS):
84
+ if log_record is not None:
85
+ outfile.write(log_record + "\n")
86
+ dataFrame = pd.read_csv('out.csv', names=HEADERS)
87
+ result = predictFromHeaderTable(dataFrame)
88
+ toReturn = {
89
+ HEADER_TABLE: dataFrame,
90
+ SENTENCES: result[SENTENCES],
91
+ OUT: result[OUT]
92
+ }
93
+ return toReturn
94
+
95
+ def makeIndexColumn(allInputs):
96
+ def _makeIndexColumnFor(column):
97
+ theseHeaders = allInputs[column].columns
98
+ newHeaders = ['#', *theseHeaders]
99
+ allInputs[column]['#'] = allInputs[column].index
100
+ allInputs[column] = allInputs[column][newHeaders]
101
+
102
+ if(SENTENCES in allInputs):
103
+ _makeIndexColumnFor(SENTENCES)
104
+ if(HEADER_TABLE in allInputs):
105
+ _makeIndexColumnFor(HEADER_TABLE)
106
+ if(OUT in allInputs):
107
+ _makeIndexColumnFor(OUT)
108
+ return allInputs
109
+
110
+ def predict(inputType, fileUpload, headerTable, sentenceTable, out):
111
+ output = {};
112
+ if(inputType == LOG):
113
+ # Process File Upload
114
+ output = makeIndexColumn(predictFromFileUpload(fileUpload))
115
+ return [output[HEADER_TABLE], output[SENTENCES], output[OUT]]
116
+ elif(inputType == HEADER_TABLE):
117
+ # Process Header Table
118
+ output = makeIndexColumn(predictFromHeaderTable(headerTable))
119
+ return [headerTable, output[SENTENCES], output[OUT]]
120
+ elif(inputType == SENTENCES):
121
+ # Process From Sentences
122
+ output = makeIndexColumn(predictFromSentences(sentenceTable))
123
+ return [headerTable, sentenceTable, output[OUT]]
124
+
125
+ # Update UI
126
+ def updateInputOutputBlocks(inputType, steps):
127
+ # Update visibility and Interactivity of Gradio Blocks based on Settings
128
+ fileUpload = gr.File(
129
+ visible=(True if inputType == LOG else False),
130
+ interactive=(1 if inputType == LOG else 0)
131
+ )
132
+ headerTable = gr.Dataframe(
133
+ visible=(True if (inputType == HEADER_TABLE or HEADER_TABLE in steps) else False),
134
+ interactive=(1 if inputType == HEADER_TABLE else 0)
135
+ )
136
+ sentenceTable = gr.Dataframe(
137
+ interactive=(1 if inputType == SENTENCES else 0),
138
+ visible=(True if (inputType == SENTENCES or SENTENCES in steps) else False)
139
+ )
140
+ return fileUpload, headerTable, sentenceTable
141
+
142
+ # Create Gradio UI
143
+ with gr.Blocks() as app:
144
+ gr.Markdown("""
145
+ # Network Log Predictions
146
+ Input log information below and click 'Run' to get predictions from our model!
147
+ Access the settings at the bottom for different types of input and to see inbetween steps.
148
+ """)
149
+ # Inputs / Outputs
150
+ fileUpload = gr.File(file_types=[".log", ".log.labeled", ".csv"], label="Zeek Log File", visible=False, file_count='single')
151
+ headerTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(7,"fixed"), headers=['#', *HEADERS], label="Header Inputs", interactive=1)
152
+ sentenceTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=["#", "Sentence"], label="Sentences", interactive=0, visible=False)
153
+ out = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=['#', "Output"], label="Predictions", column_widths=["60px", "100%"])
154
+ btn = gr.Button("Run")
155
+
156
+ # Settings
157
+ with gr.Accordion("Settings", open=False):
158
+ inputType = gr.Radio(INPUT_TYPES, value="Headers Table", label="Input")
159
+ steps = gr.CheckboxGroup(STEPS, label="Display Intermediary Steps")
160
+ inputType.change(
161
+ fn=updateInputOutputBlocks,
162
+ inputs=[inputType, steps],
163
+ outputs=[fileUpload, headerTable, sentenceTable]
164
+ )
165
+ steps.change(
166
+ fn=updateInputOutputBlocks,
167
+ inputs=[inputType, steps],
168
+ outputs=[fileUpload, headerTable, sentenceTable]
169
+ )
170
+ # Assign Callback
171
+ btn.click(
172
+ fn=predict,
173
+ inputs=[inputType, fileUpload, headerTable, sentenceTable, out],
174
+ outputs=[headerTable, sentenceTable, out]
175
+ )
176
+
177
+ app.launch()