{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "complete-wealth", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7865/\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "(,\n", " 'http://127.0.0.1:7865/',\n", " None)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "950/950 [==============================] - 1s 1ms/step - loss: 9.2114e-04 - accuracy: 0.9996\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import gradio as gr\n", "\n", "\n", "import pandas as pd\n", "from math import sqrt;\n", "from sklearn import preprocessing\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.linear_model import LogisticRegression;\n", "from sklearn.metrics import accuracy_score, r2_score, confusion_matrix, mean_absolute_error, mean_squared_error, f1_score, log_loss\n", "from sklearn.model_selection import train_test_split\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns \n", "import joblib\n", " #load packages for ANN\n", "import tensorflow as tf\n", " \n", "def malware_detection_DL (results, malicious_traffic, benign_traffic):\n", " malicious_dataset = pd.read_csv(malicious_traffic) #Importing Datasets \n", " benign_dataset = pd.read_csv(benign_traffic)\n", " # Removing duplicated rows from benign_dataset (5380 rows removed)\n", " benign_dataset = benign_dataset[benign_dataset.duplicated(keep=False) == False]\n", " # Combining both datasets together\n", " all_flows = pd.concat([malicious_dataset, benign_dataset])\n", " # Reducing the size of the dataset to reduce the amount of time taken in training models\n", " reduced_dataset = all_flows.sample(38000)\n", " #dataset with columns with nan values dropped\n", " df = reduced_dataset.drop(reduced_dataset.columns[np.isnan(reduced_dataset).any()], axis=1)\n", " #### Isolating independent and dependent variables for training dataset\n", " reduced_y = df['isMalware']\n", " reduced_x = df.drop(['isMalware'], axis=1);\n", " # Splitting datasets into training and test data\n", " x_train, x_test, y_train, y_test = train_test_split(reduced_x, reduced_y, test_size=0.2, random_state=42)\n", " \n", " #scale data between 0 and 1\n", " min_max_scaler = preprocessing.MinMaxScaler()\n", " x_scale = min_max_scaler.fit_transform(reduced_x)\n", " # Splitting datasets into training and test data\n", " x_train, x_test, y_train, y_test = train_test_split(x_scale, reduced_y, test_size=0.2, random_state=42)\n", " #type of layers in ann model is sequential, dense and uses relu activation \n", " ann = tf.keras.models.Sequential()\n", " model = tf.keras.Sequential([\n", " tf.keras.layers.Dense(32, activation ='relu', input_shape=(373,)),\n", " tf.keras.layers.Dense(32, activation = 'relu'),\n", " tf.keras.layers.Dense(1, activation = 'sigmoid'),\n", " ])\n", " \n", " \n", " model.compile(optimizer ='adam', \n", " loss = 'binary_crossentropy',\n", " metrics = ['accuracy'])\n", " #model.fit(x_train, y_train, batch_size=32, epochs = 150, validation_data=(x_test, y_test))\n", " #does not output epochs and gives evalutaion of validation data and history of losses and accuracy\n", " history = model.fit(x_train, y_train, batch_size=32, epochs = 150,verbose=0, validation_data=(x_test, y_test))\n", " _, accuracy = model.evaluate(x_train, y_train)\n", " #return history.history\n", " if results==\"Accuracy\":\n", " #summarize history for accuracy\n", " plt.plot(history.history['accuracy'])\n", " plt.plot(history.history['val_accuracy'])\n", " plt.title('model accuracy')\n", " plt.ylabel('accuracy')\n", " plt.xlabel('epoch')\n", " plt.legend(['train', 'test'], loc='upper left')\n", " return plt.show()\n", " else:\n", " # summarize history for loss\n", " plt.plot(history.history['loss'])\n", " plt.plot(history.history['val_loss'])\n", " plt.title('model loss')\n", " plt.ylabel('loss')\n", " plt.xlabel('epoch')\n", " plt.legend(['train', 'test'], loc='upper left')\n", " return plt.show()\n", " \n", " \n", " \n", "iface = gr.Interface(\n", " malware_detection_DL, [gr.inputs.Dropdown([\"Accuracy\",\"Loss\"], label=\"Result Type\"),\n", " gr.inputs.Dropdown([\"malicious_flows.csv\"], label = \"Malicious traffic in .csv\"), gr.inputs.Dropdown([\"sample_benign_flows.csv\"], label=\"Benign Traffic in .csv\")\n", " ], \"plot\",\n", " \n", " \n", ")\n", "\n", "iface.launch()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "curious-detector", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }