{ "cells": [ { "cell_type": "code", "execution_count": 7, "id": "e2d0dce9-4ba4-4088-a07b-4ddabe1abf2a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda\n", "{'modified': '2022-10-24T15:09:07.609Z', 'name': 'Scheduled Task/Job', 'description': 'Adversaries may abuse task scheduling functionality to facilitate initial or recurring execution of malicious code. On Android and iOS, APIs and libraries exist to facilitate scheduling tasks to execute at a specified date, time, or interval.\\n\\nOn Android, the `WorkManager` API allows asynchronous tasks to be scheduled with the system. `WorkManager` was introduced to unify task scheduling on Android, using `JobScheduler`, `GcmNetworkManager`, and `AlarmManager` internally. `WorkManager` offers a lot of flexibility for scheduling, including periodically, one time, or constraint-based (e.g. only when the device is charging).(Citation: Android WorkManager)\\n\\nOn iOS, the `NSBackgroundActivityScheduler` API allows asynchronous tasks to be scheduled with the system. The tasks can be scheduled to be repeating or non-repeating, however, the system chooses when the tasks will be executed. The app can choose the interval for repeating tasks, or the delay between scheduling and execution for one-time tasks.(Citation: Apple NSBackgroundActivityScheduler)', 'kill_chain_phases': [{'kill_chain_name': 'mitre-mobile-attack', 'phase_name': 'execution'}, {'kill_chain_name': 'mitre-mobile-attack', 'phase_name': 'persistence'}], 'x_mitre_modified_by_ref': 'identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5', 'x_mitre_detection': 'Scheduling tasks/jobs can be difficult to detect, and therefore enterprises may be better served focusing on detection at other stages of adversarial behavior.', 'x_mitre_platforms': ['Android', 'iOS'], 'x_mitre_domains': ['mobile-attack'], 'x_mitre_version': '1.0', 'x_mitre_contributors': ['Lorin Wu, Trend Micro'], 'x_mitre_tactic_type': ['Post-Adversary Device Access'], 'type': 'attack-pattern', 'id': 'attack-pattern--00290ac5-551e-44aa-bbd8-c4b913488a6d', 'created': '2020-11-04T16:43:31.619Z', 'created_by_ref': 'identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5', 'external_references': [{'source_name': 'mitre-attack', 'url': 'https://attack.mitre.org/techniques/T1603', 'external_id': 'T1603'}, {'source_name': 'Android WorkManager', 'description': 'Google. (n.d.). Schedule tasks with WorkManager. Retrieved November 4, 2020.', 'url': 'https://developer.android.com/topic/libraries/architecture/workmanager'}, {'source_name': 'Apple NSBackgroundActivityScheduler', 'description': 'Apple. (n.d.). NSBackgroundActivityScheduler. Retrieved November 4, 2020.', 'url': 'https://developer.apple.com/documentation/foundation/nsbackgroundactivityscheduler'}], 'object_marking_refs': ['marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168'], 'x_mitre_attack_spec_version': '2.1.0', 'x_mitre_is_subtechnique': False}\n" ] } ], "source": [ "import json\n", "import transformers\n", "import textwrap\n", "from transformers import LlamaTokenizer, LlamaForCausalLM\n", "import os\n", "import sys\n", "from typing import List\n", "\n", "from peft import (\n", " LoraConfig,\n", " get_peft_model,\n", " get_peft_model_state_dict,\n", " prepare_model_for_int8_training,\n", ")\n", "\n", "import fire\n", "import torch\n", "from datasets import load_dataset\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", "import seaborn as sns\n", "from pylab import rcParams\n", "\n", "sns.set(rc={'figure.figsize': (10, 7)})\n", "sns.set(rc={'figure.dpi': 100})\n", "sns.set(style='white', palette='muted', font_scale=1.2)\n", "\n", "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "print(DEVICE)\n", "\n", "filename = \"cti-ATT-CK-v13.1/mobile-attack/attack-pattern/attack-pattern--00290ac5-551e-44aa-bbd8-c4b913488a6d.json\"\n", "data = json.load(open(filename))\n", "print(data[\"objects\"][0])\n", "\n", "dataset_data = [\n", " {\n", " \"instruction\": \"What is\",\n", " \"input\": data[\"tweet\"],\n", " \"output\": sentiment_score_to_name(row_dict[\"sentiment\"])\n", " }\n", " for row_dict in df.to_dict(orient=\"records\")\n", "]\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "e294c7d8-f4e1-4779-b9e6-d7cd95d5b5b9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda\n", " date \n", "0 Fri Mar 23 00:40:40 +0000 2018 \\\n", "1 Fri Mar 23 00:40:40 +0000 2018 \n", "2 Fri Mar 23 00:40:42 +0000 2018 \n", "3 Fri Mar 23 00:41:04 +0000 2018 \n", "4 Fri Mar 23 00:41:07 +0000 2018 \n", "\n", " tweet sentiment \n", "0 @p0nd3ea Bitcoin wasn't built to live on excha... 1.0 \n", "1 @historyinflicks Buddy if I had whatever serie... 1.0 \n", "2 @eatBCH @Bitcoin @signalapp @myWickr @Samsung ... 0.0 \n", "3 @aantonop Even if Bitcoin crash tomorrow morni... 0.0 \n", "4 I am experimenting whether I can live only wit... 1.0 \n", "{'instruction': 'Detect the sentiment of the tweet.', 'input': \"@p0nd3ea Bitcoin wasn't built to live on exchanges.\", 'output': 'Positive'}\n" ] } ], "source": [ "import json\n", "import transformers\n", "import textwrap\n", "from transformers import LlamaTokenizer, LlamaForCausalLM\n", "import os\n", "import sys\n", "from typing import List\n", "\n", "from peft import (\n", " LoraConfig,\n", " get_peft_model,\n", " get_peft_model_state_dict,\n", " prepare_model_for_int8_training,\n", ")\n", "\n", "import fire\n", "import torch\n", "from datasets import load_dataset\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", "import seaborn as sns\n", "from pylab import rcParams\n", "\n", "sns.set(rc={'figure.figsize': (10, 7)})\n", "sns.set(rc={'figure.dpi': 100})\n", "sns.set(style='white', palette='muted', font_scale=1.2)\n", "\n", "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "print(DEVICE)\n", "\n", "df = pd.read_csv(\"bitcoin-sentiment-tweets.csv\")\n", "print(df.head())\n", "\n", "\n", "def sentiment_score_to_name(score: float):\n", " if score > 0:\n", " return \"Positive\"\n", " elif score < 0:\n", " return \"Negative\"\n", " return \"Neutral\"\n", "\n", "\n", "dataset_data = [\n", " {\n", " \"instruction\": \"Detect the sentiment of the tweet.\",\n", " \"input\": row_dict[\"tweet\"],\n", " \"output\": sentiment_score_to_name(row_dict[\"sentiment\"])\n", " }\n", " for row_dict in df.to_dict(orient=\"records\")\n", "]\n", "\n", "print(dataset_data[0])\n", "with open(\"alpaca-bitcoin-sentiment-dataset.json\", \"w\") as f:\n", " json.dump(dataset_data, f)\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "b1831b9f-37b5-4e73-ac4a-1e73226d2477", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda\n" ] } ], "source": [ "import json\n", "import transformers\n", "import textwrap\n", "from transformers import LlamaTokenizer, LlamaForCausalLM\n", "import os\n", "import sys\n", "from typing import List\n", "\n", "from peft import (\n", " LoraConfig,\n", " get_peft_model,\n", " get_peft_model_state_dict,\n", " prepare_model_for_int8_training,\n", ")\n", "\n", "import fire\n", "import torch\n", "from datasets import load_dataset\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", "import seaborn as sns\n", "from pylab import rcParams\n", "\n", "sns.set(rc={'figure.figsize': (10, 7)})\n", "sns.set(rc={'figure.dpi': 100})\n", "sns.set(style='white', palette='muted', font_scale=1.2)\n", "\n", "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "print(DEVICE)" ] }, { "cell_type": "code", "execution_count": 2, "id": "d789cc04-db22-4cdc-95e0-7e726659d446", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda\n" ] } ], "source": [ "BASE_MODEL = \"decapoda-research/llama-7b-hf\"\n", "\n", "model = LlamaForCausalLM.from_pretrained(\n", " BASE_MODEL,\n", " load_in_8bit=True,\n", " torch_dtype=torch.float16,\n", " device_map=\"auto\",\n", ")\n", "\n", "tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)\n", "\n", "tokenizer.pad_token_id = (\n", " 0 # unk. we want this to be different from the eos token\n", ")\n", "tokenizer.padding_side = \"left\"\n", "\n", "data = load_dataset(\"json\", data_files=\"alpaca-bitcoin-sentiment-dataset.json\")\n", "print(data[\"train\"])" ] }, { "cell_type": "code", "execution_count": null, "id": "ff9fea29-88ac-482e-96d5-543fc0f99b01", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" } }, "nbformat": 4, "nbformat_minor": 5 }