diff --git a/Labelling/CICIDS2017_labelling_fixed_CICFlowMeter.ipynb b/Labelling/CICIDS2017_labelling_fixed_CICFlowMeter.ipynb index f20f711..2c92550 100644 --- a/Labelling/CICIDS2017_labelling_fixed_CICFlowMeter.ipynb +++ b/Labelling/CICIDS2017_labelling_fixed_CICFlowMeter.ipynb @@ -3,15 +3,14 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import glob\n", "import os\n", + "from os.path import join\n", "from sys import platform\n", "import datetime\n", "\n", @@ -34,6 +33,7 @@ { "cell_type": "code", "execution_count": 2, + "metadata": {}, "outputs": [], "source": [ "# Basic preprocessing before getting started on labelling.\n", @@ -45,10 +45,7 @@ " for column in df.columns:\n", " if column not in ['Flow ID' , 'Timestamp', 'Src IP', 'Dst IP', 'Label']:\n", " df[column] = pd.to_numeric(df[column], errors='coerce')\n", - "\n", - " df.dropna()\n", - "\n", - " return df.dropna()\n", + " return df\n", "\n", "def read_csvs_from_path_and_reformat(path):\n", " df = pd.read_csv(path, encoding='cp1252')\n", @@ -57,7 +54,9 @@ " print(\"labels after pre-processing:\", df[\"Label\"].value_counts())\n", "\n", " df[\"Attempted Category\"] = -1\n", - "\n", + " \n", + " df[['Flow Bytes/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min']] = df[['Flow Bytes/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min']].fillna(-1)\n", + " \n", " int64_columns = [\"Total TCP Flow Time\"]\n", "\n", " int32_columns = [\"Src Port\", \"Dst Port\", \"Flow Duration\", \"Total Fwd Packet\", \"Total Bwd packets\", \"Total Length of Fwd Packet\", \"Total Length of Bwd Packet\", \"Fwd Packet Length Max\",\n", @@ -93,6 +92,13 @@ "def label_flows(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list=None,\n", " dst_ip_list= None, src_port_list=None, dst_port_list=None, additional_filters=[], attempted_category=-1, payload_filter=False):\n", "\n", + " label_flows_forward(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list, dst_ip_list, src_port_list, dst_port_list, additional_filters, attempted_category, payload_filter)\n", + " label_flows_backward(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list, dst_ip_list, src_port_list, dst_port_list, additional_filters, attempted_category, payload_filter)\n", + "\n", + " \n", + "def label_flows_forward(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list=None,\n", + " dst_ip_list= None, src_port_list=None, dst_port_list=None, additional_filters=[], attempted_category=-1, payload_filter=False):\n", + "\n", "\n", " # Create initial mask for whole df with all values set to True. Squeeze is necessary to remove second axis (with value 1)\n", " # The reason is that a df of shape (X,) gets converted to (1,X) if you '&' it with a df of shape (X,1)\n", @@ -123,6 +129,40 @@ " df[\"Label\"].mask(mask, label, inplace=True)\n", " df[\"Attempted Category\"].mask(mask, attempted_category, inplace=True)\n", "\n", + "\n", + "def label_flows_backward(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list=None,\n", + " dst_ip_list= None, src_port_list=None, dst_port_list=None, additional_filters=[], attempted_category=-1, payload_filter=False):\n", + "\n", + "\n", + " # Create initial mask for whole df with all values set to True. Squeeze is necessary to remove second axis (with value 1)\n", + " # The reason is that a df of shape (X,) gets converted to (1,X) if you '&' it with a df of shape (X,1)\n", + " mask = pd.DataFrame(True,index=df.index,columns=[df.columns[0]]).squeeze()\n", + "\n", + " attack_start_datetime = pd.to_datetime(attack_start_time_nanoseconds, unit='ns')\n", + " attack_end_datetime = pd.to_datetime(attack_end_time_nanoseconds, unit='ns')\n", + "\n", + " mask &= (df[\"Timestamp\"] >= attack_start_datetime)\n", + " mask &= (df[\"Timestamp\"] <= attack_end_datetime)\n", + "\n", + " if dst_ip_list is not None:\n", + " mask &= (df[\"Src IP\"].isin(dst_ip_list))\n", + " if src_ip_list is not None:\n", + " mask &= (df[\"Dst IP\"].isin(src_ip_list))\n", + "\n", + " if dst_port_list is not None:\n", + " mask &= (df[\"Src Port\"].isin(dst_port_list))\n", + " if src_port_list is not None:\n", + " mask &= (df[\"Dst Port\"].isin(src_port_list))\n", + "\n", + " if payload_filter:\n", + " mask &= (df[\"Total Length of Fwd Packet\"] == 0)\n", + "\n", + " for filter in additional_filters:\n", + " mask &= filter\n", + "\n", + " df[\"Label\"].mask(mask, label, inplace=True)\n", + " df[\"Attempted Category\"].mask(mask, attempted_category, inplace=True)\n", + "\n", "# This function is called when all labelling of malicious flows is completed. Anything that has not yet received a label\n", "# so far is labelled as Benign.\n", "def label_rest_as_benign_and_write_csv(df, file_to_write):\n", @@ -141,59 +181,55 @@ " df.index.name = 'id'\n", " df.to_csv(file_to_write)\n", " else:\n", - " df.to_csv(file_to_write, index=False)\n" - ], - "metadata": { - "collapsed": false - } + " df.to_csv(file_to_write, index=False)" + ] }, { "cell_type": "code", "execution_count": 3, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "labels after pre-processing: NeedManualLabel 371624\n", + "labels after pre-processing: NeedManualLabel 372343\n", "Name: Label, dtype: int64\n", - "label count after labelling:\r\n", - " BENIGN 371624\n", + "label count after labelling:\n", + " BENIGN 372343\n", "Name: Label, dtype: int64\n", - "Attempted Category count after labelling:\r\n", - " -1 371624\n", + "Attempted Category count after labelling:\n", + " -1 372343\n", "Name: Attempted Category, dtype: int64\n" ] } ], "source": [ - "monday_df = read_csvs_from_path_and_reformat(DATASET_PATH + \"Monday-WorkingHours.pcap_Flow.csv\")\n", + "monday_df = read_csvs_from_path_and_reformat(join(DATASET_PATH, \"Monday-WorkingHours.pcap_Flow.csv\"))\n", "\n", "label_rest_as_benign_and_write_csv(monday_df, OUTPUT_PATH + \"monday.csv\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 4, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "labels after pre-processing: NeedManualLabel 322078\n", + "labels after pre-processing: NeedManualLabel 322540\n", "Name: Label, dtype: int64\n", - "label count after labelling:\r\n", - " BENIGN 315106\n", - "FTP-Patator 3972\n", + "label count after labelling:\n", + " BENIGN 315549\n", + "FTP-Patator 3991\n", "SSH-Patator 2961\n", "SSH-Patator - Attempted 27\n", "FTP-Patator - Attempted 12\n", "Name: Label, dtype: int64\n", - "Attempted Category count after labelling:\r\n", - " -1 322039\n", + "Attempted Category count after labelling:\n", + " -1 322501\n", " 3 27\n", " 0 10\n", " 2 2\n", @@ -206,7 +242,7 @@ "# TUESDAY 04-07-2017 |\n", "#--------------------+\n", "\n", - "tuesday_df = read_csvs_from_path_and_reformat(DATASET_PATH + \"Tuesday-WorkingHours.pcap_Flow.csv\")\n", + "tuesday_df = read_csvs_from_path_and_reformat(join(DATASET_PATH, \"Tuesday-WorkingHours.pcap_Flow.csv\"))\n", "\n", "# FTP-PATATOR\n", "# -----------\n", @@ -240,38 +276,36 @@ "label_rest_as_benign_and_write_csv(tuesday_df, OUTPUT_PATH + \"tuesday.csv\")\n", "\n", "tuesday_df = None" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 5, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "labels after pre-processing: NeedManualLabel 496641\n", + "labels after pre-processing: NeedManualLabel 497044\n", "Name: Label, dtype: int64\n", - "label count after labelling:\r\n", - " BENIGN 319120\n", - "DoS Hulk 158468\n", + "label count after labelling:\n", + " BENIGN 319271\n", + "DoS Hulk 158545\n", "DoS GoldenEye 7567\n", - "DoS Slowloris 3859\n", - "DoS Slowhttptest - Attempted 3368\n", - "DoS Slowloris - Attempted 1847\n", - "DoS Slowhttptest 1740\n", - "DoS Hulk - Attempted 581\n", - "DoS GoldenEye - Attempted 80\n", + "DoS Slowloris - Attempted 4917\n", + "DoS Slowhttptest - Attempted 3370\n", + "DoS Slowhttptest 1741\n", + "DoS Slowloris 814\n", + "DoS Hulk - Attempted 595\n", + "DoS GoldenEye - Attempted 213\n", "Heartbleed 11\n", "Name: Label, dtype: int64\n", - "Attempted Category count after labelling:\r\n", - " -1 490765\n", - " 0 2927\n", - " 6 2804\n", - " 5 138\n", + "Attempted Category count after labelling:\n", + " -1 487949\n", + " 6 7484\n", + " 0 1446\n", + " 5 158\n", " 4 4\n", " 2 3\n", "Name: Attempted Category, dtype: int64\n" @@ -283,7 +317,7 @@ "# WEDNESDAY 05-07-2017 |\n", "#----------------------+\n", "\n", - "wednesday_df = read_csvs_from_path_and_reformat(DATASET_PATH + \"Wednesday-WorkingHours.pcap_Flow.csv\")\n", + "wednesday_df = read_csvs_from_path_and_reformat(join(DATASET_PATH, \"Wednesday-WorkingHours.pcap_Flow.csv\"))\n", "\n", "# DoS Slowloris\n", "# -------------\n", @@ -403,25 +437,23 @@ "label_rest_as_benign_and_write_csv(wednesday_df, OUTPUT_PATH + \"wednesday.csv\")\n", "\n", "wednesday_df = None" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "labels after pre-processing: NeedManualLabel 362076\n", + "labels after pre-processing: NeedManualLabel 355560\n", "Name: Label, dtype: int64\n", - "label count after labelling:\r\n", - " BENIGN 288172\n", - "Infiltration - Portscan 71767\n", - "Web Attack - Brute Force - Attempted 1292\n", + "label count after labelling:\n", + " BENIGN 286745\n", + "Infiltration - Portscan 66676\n", + "Web Attack - Brute Force - Attempted 1294\n", "Web Attack - XSS - Attempted 655\n", "Web Attack - Brute Force 73\n", "Infiltration - Attempted 45\n", @@ -430,9 +462,9 @@ "Web Attack - SQL Injection 13\n", "Web Attack - SQL Injection - Attempted 5\n", "Name: Label, dtype: int64\n", - "Attempted Category count after labelling:\r\n", - " -1 360079\n", - " 0 1908\n", + "Attempted Category count after labelling:\n", + " -1 353561\n", + " 0 1910\n", " 4 71\n", " 2 18\n", "Name: Attempted Category, dtype: int64\n" @@ -444,7 +476,7 @@ "# THURSDAY 06-07-2017 |\n", "#---------------------+\n", "\n", - "thursday_df = read_csvs_from_path_and_reformat(DATASET_PATH + \"Thursday-WorkingHours.pcap_Flow.csv\")\n", + "thursday_df = read_csvs_from_path_and_reformat(join(DATASET_PATH, \"Thursday-WorkingHours.pcap_Flow.csv\"))\n", "\n", "# Web Attack - Brute Force\n", "# ------------------------\n", @@ -559,30 +591,28 @@ "label_rest_as_benign_and_write_csv(thursday_df, OUTPUT_PATH + \"thursday.csv\")\n", "\n", "thursday_df = None" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 7, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "labels after pre-processing: NeedManualLabel 547557\n", + "labels after pre-processing: NeedManualLabel 548605\n", "Name: Label, dtype: int64\n", - "label count after labelling:\r\n", - " BENIGN 288544\n", - "Portscan 159066\n", - "DDoS 95144\n", + "label count after labelling:\n", + " BENIGN 287636\n", + "Portscan 160483\n", + "DDoS 95683\n", "Botnet - Attempted 4067\n", "Botnet 736\n", "Name: Label, dtype: int64\n", - "Attempted Category count after labelling:\r\n", - " -1 543490\n", + "Attempted Category count after labelling:\n", + " -1 544538\n", " 1 4067\n", "Name: Attempted Category, dtype: int64\n" ] @@ -593,7 +623,7 @@ "# FRIDAY 07-07-2017 |\n", "#---------------------+\n", "\n", - "friday_df = read_csvs_from_path_and_reformat(DATASET_PATH + \"Friday-WorkingHours.pcap_Flow.csv\")\n", + "friday_df = read_csvs_from_path_and_reformat(join(DATASET_PATH, \"Friday-WorkingHours.pcap_Flow.csv\"))\n", "\n", "# Portscan\n", "# --------\n", @@ -630,19 +660,7 @@ "label_rest_as_benign_and_write_csv(friday_df, OUTPUT_PATH + \"friday.csv\")\n", "\n", "friday_df = None" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 16, - "outputs": [], - "source": [], - "metadata": { - "collapsed": false - } + ] } ], "metadata": { @@ -654,16 +672,16 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.9.13" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/Labelling/CICIDS2018_labelling_fixed_CICFlowMeter.ipynb b/Labelling/CICIDS2018_labelling_fixed_CICFlowMeter.ipynb index 747e916..ad1b93e 100644 --- a/Labelling/CICIDS2018_labelling_fixed_CICFlowMeter.ipynb +++ b/Labelling/CICIDS2018_labelling_fixed_CICFlowMeter.ipynb @@ -3,12 +3,14 @@ { "cell_type": "code", "execution_count": 1, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import glob\n", "import os\n", + "from os.path import join\n", "from sys import platform\n", "\n", "# THIS LABELLING SCRIPT IS USED TO LABEL THE CORRECTED VERSION OF CSE-CIC-IDS-2018.\n", @@ -27,14 +29,12 @@ "\n", "# If set to true, a column is added at the front of the CSV with line numbers\n", "print_index = True" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 2, + "metadata": {}, "outputs": [], "source": [ "# Basic preprocessing before getting started on labelling.\n", @@ -46,7 +46,7 @@ " for column in df.columns:\n", " if column not in ['Flow ID' , 'Timestamp', 'Src IP', 'Dst IP', 'Label']:\n", " df[column] = pd.to_numeric(df[column], errors='coerce')\n", - " return df.dropna()\n", + " return df\n", "\n", "# Reads all csvs of one day and concatenates them into one dataframe\n", "def read_csvs_from_path_and_reformat(path):\n", @@ -63,6 +63,8 @@ "\n", " df[\"Attempted Category\"] = -1\n", "\n", + " df[['Flow Bytes/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min']] = df[['Flow Bytes/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min']].fillna(-1)\n", + "\n", " int64_columns = [\"Total TCP Flow Time\"]\n", "\n", " int32_columns = [\"Src Port\", \"Dst Port\", \"Flow Duration\", \"Total Fwd Packet\", \"Total Bwd packets\", \"Total Length of Fwd Packet\", \"Total Length of Bwd Packet\", \"Fwd Packet Length Max\",\n", @@ -97,34 +99,78 @@ "# payload_filter = When set to true, this will automatically add a constraint [\"Total Length of Fwd Packet\"] == 0. Note that\n", "# the Attempted label and category still need to be specified manually\n", "def label_flows(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list=None,\n", - " dst_ip_list=None, dst_port_list=None, attempted_category=-1, additional_filters=[], payload_filter = False):\n", + " dst_ip_list= None, src_port_list=None, dst_port_list=None, additional_filters=[], attempted_category=-1, payload_filter=False):\n", + "\n", + " label_flows_forward(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list, dst_ip_list, src_port_list, dst_port_list, additional_filters, attempted_category, payload_filter)\n", + " label_flows_backward(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list, dst_ip_list, src_port_list, dst_port_list, additional_filters, attempted_category, payload_filter)\n", + "\n", + " \n", + "def label_flows_forward(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list=None,\n", + " dst_ip_list= None, src_port_list=None, dst_port_list=None, additional_filters=[], attempted_category=-1, payload_filter=False):\n", "\n", - " # Create initial mask with all values set to True. Squeeze is necessary to remove second axis (of size 1)\n", - " # The reason is that a df of shape (X,), if you '&' it with a df of shape (X,1), gets converted to (1,X)\n", - " custom_mask = pd.DataFrame(True, index=df.index, columns=[df.columns[0]]).squeeze()\n", + "\n", + " # Create initial mask for whole df with all values set to True. Squeeze is necessary to remove second axis (with value 1)\n", + " # The reason is that a df of shape (X,) gets converted to (1,X) if you '&' it with a df of shape (X,1)\n", + " mask = pd.DataFrame(True,index=df.index,columns=[df.columns[0]]).squeeze()\n", "\n", " attack_start_datetime = pd.to_datetime(attack_start_time_nanoseconds, unit='ns')\n", " attack_end_datetime = pd.to_datetime(attack_end_time_nanoseconds, unit='ns')\n", "\n", - " custom_mask &= (df[\"Timestamp\"] >= attack_start_datetime)\n", - " custom_mask &= (df[\"Timestamp\"] <= attack_end_datetime)\n", + " mask &= (df[\"Timestamp\"] >= attack_start_datetime)\n", + " mask &= (df[\"Timestamp\"] <= attack_end_datetime)\n", "\n", " if src_ip_list is not None:\n", - " custom_mask &= (df[\"Src IP\"].isin(src_ip_list))\n", + " mask &= (df[\"Src IP\"].isin(src_ip_list))\n", " if dst_ip_list is not None:\n", - " custom_mask &= (df[\"Dst IP\"].isin(dst_ip_list))\n", + " mask &= (df[\"Dst IP\"].isin(dst_ip_list))\n", "\n", + " if src_port_list is not None:\n", + " mask &= (df[\"Src Port\"].isin(src_port_list))\n", " if dst_port_list is not None:\n", - " custom_mask &= (df[\"Dst Port\"].isin(dst_port_list))\n", + " mask &= (df[\"Dst Port\"].isin(dst_port_list))\n", "\n", " if payload_filter:\n", - " custom_mask &= (df[\"Total Length of Fwd Packet\"] == 0)\n", + " mask &= (df[\"Total Length of Fwd Packet\"] == 0)\n", "\n", " for filter in additional_filters:\n", - " custom_mask &= filter\n", + " mask &= filter\n", + "\n", + " df[\"Label\"].mask(mask, label, inplace=True)\n", + " df[\"Attempted Category\"].mask(mask, attempted_category, inplace=True)\n", + "\n", + "\n", + "def label_flows_backward(df, label, attack_start_time_nanoseconds, attack_end_time_nanoseconds, src_ip_list=None,\n", + " dst_ip_list= None, src_port_list=None, dst_port_list=None, additional_filters=[], attempted_category=-1, payload_filter=False):\n", "\n", - " df[\"Label\"].mask(custom_mask, label, inplace=True)\n", - " df[\"Attempted Category\"].mask(custom_mask, attempted_category, inplace=True)\n", + "\n", + " # Create initial mask for whole df with all values set to True. Squeeze is necessary to remove second axis (with value 1)\n", + " # The reason is that a df of shape (X,) gets converted to (1,X) if you '&' it with a df of shape (X,1)\n", + " mask = pd.DataFrame(True,index=df.index,columns=[df.columns[0]]).squeeze()\n", + "\n", + " attack_start_datetime = pd.to_datetime(attack_start_time_nanoseconds, unit='ns')\n", + " attack_end_datetime = pd.to_datetime(attack_end_time_nanoseconds, unit='ns')\n", + "\n", + " mask &= (df[\"Timestamp\"] >= attack_start_datetime)\n", + " mask &= (df[\"Timestamp\"] <= attack_end_datetime)\n", + "\n", + " if dst_ip_list is not None:\n", + " mask &= (df[\"Src IP\"].isin(dst_ip_list))\n", + " if src_ip_list is not None:\n", + " mask &= (df[\"Dst IP\"].isin(src_ip_list))\n", + "\n", + " if dst_port_list is not None:\n", + " mask &= (df[\"Src Port\"].isin(dst_port_list))\n", + " if src_port_list is not None:\n", + " mask &= (df[\"Dst Port\"].isin(src_port_list))\n", + "\n", + " if payload_filter:\n", + " mask &= (df[\"Total Length of Fwd Packet\"] == 0)\n", + "\n", + " for filter in additional_filters:\n", + " mask &= filter\n", + "\n", + " df[\"Label\"].mask(mask, label, inplace=True)\n", + " df[\"Attempted Category\"].mask(mask, attempted_category, inplace=True)\n", "\n", "# This function is called when all labelling of malicious flows is completed. Anything that has not yet received a label\n", "# so far is labelled as Benign.\n", @@ -137,6 +183,7 @@ " print(\"label count after labelling:\\r\\n\", df[\"Label\"].value_counts())\n", " print(\"Attempted Category count after labelling:\\r\\n\", df[\"Attempted Category\"].value_counts())\n", "\n", + " # Adds line numbers in the first column if print_index is set to true\n", " if print_index:\n", " df.reset_index(inplace=True, drop=True)\n", " df.index += 1\n", @@ -144,14 +191,12 @@ " df.to_csv(file_to_write)\n", " else:\n", " df.to_csv(file_to_write, index=False)" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 3, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -201,14 +246,12 @@ " 1518636750*(10**9), [\"13.58.98.64\"], [\"172.31.69.25\"], [22], attempted_category=0, payload_filter=True)\n", "\n", "label_rest_as_benign_and_write_csv(wednesday_14022018_df, DATASET_PATH + dir_name + \".csv\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 9, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -274,14 +317,12 @@ " [\"172.31.69.25\"], attempted_category=0, payload_filter=True)\n", "\n", "label_rest_as_benign_and_write_csv(thursday_15022018_df, DATASET_PATH + dir_name + \".csv\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 5, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -328,14 +369,12 @@ "# Instead we only find failed FTP-Patator traffic, which is exactly what is covered earlier in this cell\n", "\n", "label_rest_as_benign_and_write_csv(friday_16022018_df, DATASET_PATH + dir_name + \".csv\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -400,14 +439,12 @@ " attempted_category=6, additional_filters=[(tuesday_20022018_df[\"Protocol\"] == 1)])\n", "\n", "label_rest_as_benign_and_write_csv(tuesday_20022018_df, DATASET_PATH + dir_name + \".csv\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 7, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -473,14 +510,12 @@ " [\"172.31.69.28\"], attempted_category=0, payload_filter=True, additional_filters=[wednesday_21022018_df[\"Protocol\"] == 6])\n", "\n", "label_rest_as_benign_and_write_csv(wednesday_21022018_df, DATASET_PATH + dir_name + \".csv\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 8, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -569,14 +604,12 @@ " [\"18.218.115.60\"], [\"172.31.69.28\"], attempted_category=0, payload_filter=True)\n", "\n", "label_rest_as_benign_and_write_csv(thursday_22022018_df, DATASET_PATH + dir_name + \".csv\")\n" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 9, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -653,14 +686,12 @@ " [\"18.218.115.60\"], [\"172.31.69.28\"], attempted_category=0, payload_filter=True)\n", "\n", "label_rest_as_benign_and_write_csv(friday_23022018_df, DATASET_PATH + dir_name + \".csv\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 10, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -746,14 +777,12 @@ " [~(wednesday_28022018_df[\"Src Port\"] == 68)])\n", "\n", "label_rest_as_benign_and_write_csv(wednesday_28022018_df, DATASET_PATH + dir_name + \".csv\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 11, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -839,14 +868,12 @@ " [thursday_01032018_df[\"Src Port\"] != 68])\n", "\n", "label_rest_as_benign_and_write_csv(thursday_01032018_df, DATASET_PATH + dir_name + \".csv\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 3, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -895,19 +922,14 @@ "\n", "label_rest_as_benign_and_write_csv(friday_02032018_df, DATASET_PATH + dir_name + \".csv\")\n", "\n" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 12, + "metadata": {}, "outputs": [], - "source": [], - "metadata": { - "collapsed": false - } + "source": [] } ], "metadata": { @@ -919,16 +941,16 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.9.13" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 }