File size: 10,225 Bytes

5fe70fd

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e2d0dce9-4ba4-4088-a07b-4ddabe1abf2a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda\n",
      "{'modified': '2022-10-24T15:09:07.609Z', 'name': 'Scheduled Task/Job', 'description': 'Adversaries may abuse task scheduling functionality to facilitate initial or recurring execution of malicious code. On Android and iOS, APIs and libraries exist to facilitate scheduling tasks to execute at a specified date, time, or interval.\\n\\nOn Android, the `WorkManager` API allows asynchronous tasks to be scheduled with the system. `WorkManager` was introduced to unify task scheduling on Android, using `JobScheduler`, `GcmNetworkManager`, and `AlarmManager` internally. `WorkManager` offers a lot of flexibility for scheduling, including periodically, one time, or constraint-based (e.g. only when the device is charging).(Citation: Android WorkManager)\\n\\nOn iOS, the `NSBackgroundActivityScheduler` API allows asynchronous tasks to be scheduled with the system. The tasks can be scheduled to be repeating or non-repeating, however, the system chooses when the tasks will be executed. The app can choose the interval for repeating tasks, or the delay between scheduling and execution for one-time tasks.(Citation: Apple NSBackgroundActivityScheduler)', 'kill_chain_phases': [{'kill_chain_name': 'mitre-mobile-attack', 'phase_name': 'execution'}, {'kill_chain_name': 'mitre-mobile-attack', 'phase_name': 'persistence'}], 'x_mitre_modified_by_ref': 'identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5', 'x_mitre_detection': 'Scheduling tasks/jobs can be difficult to detect, and therefore enterprises may be better served focusing on detection at other stages of adversarial behavior.', 'x_mitre_platforms': ['Android', 'iOS'], 'x_mitre_domains': ['mobile-attack'], 'x_mitre_version': '1.0', 'x_mitre_contributors': ['Lorin Wu, Trend Micro'], 'x_mitre_tactic_type': ['Post-Adversary Device Access'], 'type': 'attack-pattern', 'id': 'attack-pattern--00290ac5-551e-44aa-bbd8-c4b913488a6d', 'created': '2020-11-04T16:43:31.619Z', 'created_by_ref': 'identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5', 'external_references': [{'source_name': 'mitre-attack', 'url': 'https://attack.mitre.org/techniques/T1603', 'external_id': 'T1603'}, {'source_name': 'Android WorkManager', 'description': 'Google. (n.d.). Schedule tasks with WorkManager. Retrieved November 4, 2020.', 'url': 'https://developer.android.com/topic/libraries/architecture/workmanager'}, {'source_name': 'Apple NSBackgroundActivityScheduler', 'description': 'Apple. (n.d.). NSBackgroundActivityScheduler. Retrieved November 4, 2020.', 'url': 'https://developer.apple.com/documentation/foundation/nsbackgroundactivityscheduler'}], 'object_marking_refs': ['marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168'], 'x_mitre_attack_spec_version': '2.1.0', 'x_mitre_is_subtechnique': False}\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import transformers\n",
    "import textwrap\n",
    "from transformers import LlamaTokenizer, LlamaForCausalLM\n",
    "import os\n",
    "import sys\n",
    "from typing import List\n",
    "\n",
    "from peft import (\n",
    "    LoraConfig,\n",
    "    get_peft_model,\n",
    "    get_peft_model_state_dict,\n",
    "    prepare_model_for_int8_training,\n",
    ")\n",
    "\n",
    "import fire\n",
    "import torch\n",
    "from datasets import load_dataset\n",
    "import pandas as pd\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib as mpl\n",
    "import seaborn as sns\n",
    "from pylab import rcParams\n",
    "\n",
    "sns.set(rc={'figure.figsize': (10, 7)})\n",
    "sns.set(rc={'figure.dpi': 100})\n",
    "sns.set(style='white', palette='muted', font_scale=1.2)\n",
    "\n",
    "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "print(DEVICE)\n",
    "\n",
    "filename = \"cti-ATT-CK-v13.1/mobile-attack/attack-pattern/attack-pattern--00290ac5-551e-44aa-bbd8-c4b913488a6d.json\"\n",
    "data = json.load(open(filename))\n",
    "print(data[\"objects\"][0])\n",
    "\n",
    "dataset_data = [\n",
    "    {\n",
    "        \"instruction\": \"What is\",\n",
    "        \"input\": data[\"tweet\"],\n",
    "        \"output\": sentiment_score_to_name(row_dict[\"sentiment\"])\n",
    "    }\n",
    "    for row_dict in df.to_dict(orient=\"records\")\n",
    "]\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e294c7d8-f4e1-4779-b9e6-d7cd95d5b5b9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda\n",
      "                             date   \n",
      "0  Fri Mar 23 00:40:40 +0000 2018  \\\n",
      "1  Fri Mar 23 00:40:40 +0000 2018   \n",
      "2  Fri Mar 23 00:40:42 +0000 2018   \n",
      "3  Fri Mar 23 00:41:04 +0000 2018   \n",
      "4  Fri Mar 23 00:41:07 +0000 2018   \n",
      "\n",
      "                                               tweet  sentiment  \n",
      "0  @p0nd3ea Bitcoin wasn't built to live on excha...        1.0  \n",
      "1  @historyinflicks Buddy if I had whatever serie...        1.0  \n",
      "2  @eatBCH @Bitcoin @signalapp @myWickr @Samsung ...        0.0  \n",
      "3  @aantonop Even if Bitcoin crash tomorrow morni...        0.0  \n",
      "4  I am experimenting whether I can live only wit...        1.0  \n",
      "{'instruction': 'Detect the sentiment of the tweet.', 'input': \"@p0nd3ea Bitcoin wasn't built to live on exchanges.\", 'output': 'Positive'}\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import transformers\n",
    "import textwrap\n",
    "from transformers import LlamaTokenizer, LlamaForCausalLM\n",
    "import os\n",
    "import sys\n",
    "from typing import List\n",
    "\n",
    "from peft import (\n",
    "    LoraConfig,\n",
    "    get_peft_model,\n",
    "    get_peft_model_state_dict,\n",
    "    prepare_model_for_int8_training,\n",
    ")\n",
    "\n",
    "import fire\n",
    "import torch\n",
    "from datasets import load_dataset\n",
    "import pandas as pd\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib as mpl\n",
    "import seaborn as sns\n",
    "from pylab import rcParams\n",
    "\n",
    "sns.set(rc={'figure.figsize': (10, 7)})\n",
    "sns.set(rc={'figure.dpi': 100})\n",
    "sns.set(style='white', palette='muted', font_scale=1.2)\n",
    "\n",
    "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "print(DEVICE)\n",
    "\n",
    "df = pd.read_csv(\"bitcoin-sentiment-tweets.csv\")\n",
    "print(df.head())\n",
    "\n",
    "\n",
    "def sentiment_score_to_name(score: float):\n",
    "    if score > 0:\n",
    "        return \"Positive\"\n",
    "    elif score < 0:\n",
    "        return \"Negative\"\n",
    "    return \"Neutral\"\n",
    "\n",
    "\n",
    "dataset_data = [\n",
    "    {\n",
    "        \"instruction\": \"Detect the sentiment of the tweet.\",\n",
    "        \"input\": row_dict[\"tweet\"],\n",
    "        \"output\": sentiment_score_to_name(row_dict[\"sentiment\"])\n",
    "    }\n",
    "    for row_dict in df.to_dict(orient=\"records\")\n",
    "]\n",
    "\n",
    "print(dataset_data[0])\n",
    "with open(\"alpaca-bitcoin-sentiment-dataset.json\", \"w\") as f:\n",
    "    json.dump(dataset_data, f)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b1831b9f-37b5-4e73-ac4a-1e73226d2477",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import transformers\n",
    "import textwrap\n",
    "from transformers import LlamaTokenizer, LlamaForCausalLM\n",
    "import os\n",
    "import sys\n",
    "from typing import List\n",
    "\n",
    "from peft import (\n",
    "    LoraConfig,\n",
    "    get_peft_model,\n",
    "    get_peft_model_state_dict,\n",
    "    prepare_model_for_int8_training,\n",
    ")\n",
    "\n",
    "import fire\n",
    "import torch\n",
    "from datasets import load_dataset\n",
    "import pandas as pd\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib as mpl\n",
    "import seaborn as sns\n",
    "from pylab import rcParams\n",
    "\n",
    "sns.set(rc={'figure.figsize': (10, 7)})\n",
    "sns.set(rc={'figure.dpi': 100})\n",
    "sns.set(style='white', palette='muted', font_scale=1.2)\n",
    "\n",
    "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "print(DEVICE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d789cc04-db22-4cdc-95e0-7e726659d446",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda\n"
     ]
    }
   ],
   "source": [
    "BASE_MODEL = \"decapoda-research/llama-7b-hf\"\n",
    "\n",
    "model = LlamaForCausalLM.from_pretrained(\n",
    "    BASE_MODEL,\n",
    "    load_in_8bit=True,\n",
    "    torch_dtype=torch.float16,\n",
    "    device_map=\"auto\",\n",
    ")\n",
    "\n",
    "tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)\n",
    "\n",
    "tokenizer.pad_token_id = (\n",
    "    0  # unk. we want this to be different from the eos token\n",
    ")\n",
    "tokenizer.padding_side = \"left\"\n",
    "\n",
    "data = load_dataset(\"json\", data_files=\"alpaca-bitcoin-sentiment-dataset.json\")\n",
    "print(data[\"train\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ff9fea29-88ac-482e-96d5-543fc0f99b01",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}