Spaces:

19kmunz
/

IoT-23-BERT-Network-Logs-Classification

Sleeping

App Files Files Community

19kmunz commited on Nov 27, 2023

Commit

5e87978

•

1 Parent(s): 0519586

Create app.py

Browse files

Files changed (1) hide show

app.py +178 -0

app.py ADDED Viewed

	@@ -0,0 +1,178 @@

+# -*- coding: utf-8 -*-
+"""GradioInterface_v2.ipynb
+Automatically generated by Colaboratory.
+"""
+# Commented out IPython magic to ensure Python compatibility.
+# # Capture to supress the download ouput
+# %%capture
+# !pip install gradio
+# !pip install pandas
+# !pip install transformers
+# !pip install parsezeeklogs
+# !pip install elasticsearch
+# Define imports for model use
+import torch
+from transformers import pipeline
+from parsezeeklogs import ParseZeekLogs
+from transformers import BertTokenizer
+import gradio as gr
+import pandas as pd
+# Define model
+pipe = pipeline(model="19kmunz/IoT-23-BERT-Network-Logs-Classification", tokenizer=BertTokenizer.from_pretrained("bert-base-cased"))
+# Define string constants
+LOG = "conn.log Output"
+HEADER_TABLE = "Headers Table"
+SENTENCES = "Sentences"
+OUT = "out"
+INPUT_TYPES = [LOG, HEADER_TABLE, SENTENCES]
+STEPS = [HEADER_TABLE, SENTENCES]
+HEADERS=['id.resp_p', 'proto', 'conn_state', 'orig_pkts', 'orig_ip_bytes', 'resp_ip_bytes']
+# Define sentence-ization functions
+# Dictionary of feature names to use in the make sentence function
+feature_names = {'id.resp_p':'response port',
+                 'proto':'transport protocol',
+                 'orig_pkts':'number of packets sent by the origin',
+                 'conn_state':'connection state',
+                 'orig_ip_bytes':'number of IP level bytes sent by the originator',
+                 'resp_ip_bytes':'number of IP level bytes sent by the responder'}
+# Function to make sentences out of the data
+def make_sentence(row):
+  sentences = {}
+  for feature in row.keys():
+    if feature == 'label' or feature == "#":
+      sentences[feature] = row[feature]
+    else:
+      sentences[feature] = feature_names[feature] + " is " + str(row[feature]) + "."
+  return sentences
+# Take all sentence observations and make them into paragraph inputs
+def make_paragraphs(ser):
+  paragraphs_list = []
+  for index,obs in ser.items():
+    new_para = obs['id.resp_p'] + " " + obs['proto'] + " " + obs['conn_state'] + " " + obs['orig_pkts'] + " " + obs['orig_ip_bytes'] + " " + obs['resp_ip_bytes']
+    paragraphs_list.append(new_para)
+  return pd.Series(paragraphs_list, name="Sentences", index=ser.index).to_frame()
+# Define prediction Functions For Different Settings
+def predictFromSentences(sentenceTable):
+  output = pipe(sentenceTable[SENTENCES].tolist()) # This does the prediction!
+  return { OUT: pd.DataFrame({"Output": ["Malicious" if pred['label'] == "LABEL_0" else "Benign" for pred in output] }) }
+def predictFromHeaderTable(headerTable):
+  sentences = headerTable.apply(make_sentence, axis=1);
+  paragraphs = make_paragraphs(sentences)
+  return {
+      SENTENCES: paragraphs,
+      OUT: predictFromSentences(paragraphs)[OUT]
+  }
+def predictFromFileUpload(fileUpload):
+  if(fileUpload is None):
+      raise gr.Error("No file uploaded")
+  fileType = fileUpload.split('.')[-1]
+  if(fileType == 'csv'):
+    dataFrame = pd.read_csv(fileUpload, usecols=HEADERS)
+  elif(fileType == 'log' or fileType == 'labeled'):
+    with open('out.csv',"w") as outfile:
+      for log_record in ParseZeekLogs(fileUpload, output_format="csv", safe_headers=False, fields=HEADERS):
+          if log_record is not None:
+              outfile.write(log_record + "\n")
+    dataFrame = pd.read_csv('out.csv', names=HEADERS)
+  result = predictFromHeaderTable(dataFrame)
+  toReturn =  {
+      HEADER_TABLE: dataFrame,
+      SENTENCES: result[SENTENCES],
+      OUT: result[OUT]
+  }
+  return toReturn
+def makeIndexColumn(allInputs):
+  def _makeIndexColumnFor(column):
+    theseHeaders = allInputs[column].columns
+    newHeaders = ['#', *theseHeaders]
+    allInputs[column]['#'] = allInputs[column].index
+    allInputs[column] = allInputs[column][newHeaders]
+  if(SENTENCES in allInputs):
+    _makeIndexColumnFor(SENTENCES)
+  if(HEADER_TABLE in allInputs):
+    _makeIndexColumnFor(HEADER_TABLE)
+  if(OUT in allInputs):
+    _makeIndexColumnFor(OUT)
+  return allInputs
+def predict(inputType, fileUpload, headerTable, sentenceTable, out):
+  output = {};
+  if(inputType == LOG):
+    # Process File Upload
+    output = makeIndexColumn(predictFromFileUpload(fileUpload))
+    return [output[HEADER_TABLE], output[SENTENCES], output[OUT]]
+  elif(inputType == HEADER_TABLE):
+    # Process Header Table
+    output = makeIndexColumn(predictFromHeaderTable(headerTable))
+    return [headerTable, output[SENTENCES], output[OUT]]
+  elif(inputType == SENTENCES):
+    # Process From Sentences
+    output = makeIndexColumn(predictFromSentences(sentenceTable))
+    return [headerTable, sentenceTable, output[OUT]]
+# Update UI
+def updateInputOutputBlocks(inputType, steps):
+  # Update visibility and Interactivity of Gradio Blocks based on Settings
+  fileUpload = gr.File(
+      visible=(True if inputType == LOG else False),
+      interactive=(1 if inputType == LOG else 0)
+  )
+  headerTable = gr.Dataframe(
+      visible=(True if (inputType == HEADER_TABLE or HEADER_TABLE in steps) else False),
+      interactive=(1 if inputType == HEADER_TABLE else 0)
+    )
+  sentenceTable = gr.Dataframe(
+      interactive=(1 if inputType == SENTENCES else 0),
+      visible=(True if (inputType == SENTENCES or SENTENCES in steps) else False)
+    )
+  return fileUpload, headerTable, sentenceTable
+# Create Gradio UI
+with gr.Blocks() as app:
+    gr.Markdown("""
+    # Network Log Predictions
+    Input log information below and click 'Run' to get predictions from our model!
+    Access the settings at the bottom for different types of input and to see inbetween steps.
+    """)
+    # Inputs / Outputs
+    fileUpload = gr.File(file_types=[".log", ".log.labeled", ".csv"], label="Zeek Log File", visible=False, file_count='single')
+    headerTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(7,"fixed"), headers=['#', *HEADERS], label="Header Inputs", interactive=1)
+    sentenceTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=["#", "Sentence"], label="Sentences", interactive=0, visible=False)
+    out = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=['#', "Output"], label="Predictions", column_widths=["60px", "100%"])
+    btn = gr.Button("Run")
+    # Settings
+    with gr.Accordion("Settings", open=False):
+      inputType = gr.Radio(INPUT_TYPES, value="Headers Table", label="Input")
+      steps = gr.CheckboxGroup(STEPS, label="Display Intermediary Steps")
+      inputType.change(
+        fn=updateInputOutputBlocks,
+        inputs=[inputType, steps],
+        outputs=[fileUpload, headerTable, sentenceTable]
+      )
+      steps.change(
+        fn=updateInputOutputBlocks,
+        inputs=[inputType, steps],
+        outputs=[fileUpload, headerTable, sentenceTable]
+      )
+    # Assign Callback
+    btn.click(
+        fn=predict,
+        inputs=[inputType, fileUpload, headerTable, sentenceTable, out],
+        outputs=[headerTable, sentenceTable, out]
+        )
+app.launch()