failspy commited on
Commit
84f2b19
1 Parent(s): 93ebb1d

Upload ortho_cookbook

Browse files
Files changed (1) hide show
  1. ortho_cookbook.ipynb +1129 -0
ortho_cookbook.ipynb ADDED
@@ -0,0 +1,1129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "82acAhWYGIPx"
7
+ },
8
+ "source": [
9
+ "# Demo of bypassing refusal"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "markdown",
14
+ "metadata": {
15
+ "id": "j7hOtw7UHXdD"
16
+ },
17
+ "source": [
18
+ "This notebook demonstrates our method for bypassing refusal, levaraging the insight that refusal is mediated by a 1-dimensional subspace. We recommend reading the [research post](https://www.lesswrong.com/posts/jGuXSZgv6qfdhMCuJ/refusal-in-llms-is-mediated-by-a-single-direction) for a more thorough explanation.\n",
19
+ "\n",
20
+ "Modified by FailSpy for easier usage"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "markdown",
25
+ "metadata": {
26
+ "id": "fcxHyDZw6b86"
27
+ },
28
+ "source": [
29
+ "## Setup"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": null,
35
+ "metadata": {
36
+ "id": "dLeei4-T6Wef"
37
+ },
38
+ "outputs": [],
39
+ "source": [
40
+ "%%capture\n",
41
+ "!pip install transformers transformers_stream_generator tiktoken transformer_lens einops jaxtyping colorama scikit-learn"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": null,
47
+ "metadata": {
48
+ "id": "_vhhwl-2-jPg"
49
+ },
50
+ "outputs": [],
51
+ "source": [
52
+ "import torch\n",
53
+ "import functools\n",
54
+ "import einops\n",
55
+ "import requests\n",
56
+ "import pandas as pd\n",
57
+ "import io\n",
58
+ "import textwrap\n",
59
+ "import gc\n",
60
+ "\n",
61
+ "from datasets import load_dataset\n",
62
+ "from sklearn.model_selection import train_test_split\n",
63
+ "from tqdm import tqdm\n",
64
+ "from torch import Tensor\n",
65
+ "from typing import List, Callable\n",
66
+ "from transformer_lens import HookedTransformer, utils\n",
67
+ "from transformer_lens.hook_points import HookPoint\n",
68
+ "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
69
+ "from jaxtyping import Float, Int\n",
70
+ "from colorama import Fore\n",
71
+ "\n",
72
+ "# We turn automatic differentiation off, to save GPU memory, as this notebook focuses on model inference not model training. (credit: Undi95)\n",
73
+ "torch.set_grad_enabled(False)"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "markdown",
78
+ "metadata": {},
79
+ "source": [
80
+ "### Load harmful/harmless prompts datasets, and chat template\n",
81
+ "Please note you may need to change the `CHAT_TEMPLATE` for whichever model you're using"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "metadata": {
88
+ "id": "Rth8yvLZJsXs"
89
+ },
90
+ "outputs": [],
91
+ "source": [
92
+ "def get_harmful_instructions():\n",
93
+ " url = 'https://raw.githubusercontent.com/llm-attacks/llm-attacks/main/data/advbench/harmful_behaviors.csv'\n",
94
+ " response = requests.get(url)\n",
95
+ " dataset = pd.read_csv(io.StringIO(response.content.decode('utf-8')))\n",
96
+ " instructions = dataset['goal'].tolist()\n",
97
+ " train, test = train_test_split(instructions, test_size=0.2, random_state=42)\n",
98
+ " return train, test\n",
99
+ "\n",
100
+ "def get_harmless_instructions():\n",
101
+ " hf_path = 'tatsu-lab/alpaca'\n",
102
+ " dataset = load_dataset(hf_path)\n",
103
+ " # filter for instructions that do not have inputs\n",
104
+ " instructions = []\n",
105
+ " for i in range(len(dataset['train'])):\n",
106
+ " if dataset['train'][i]['input'].strip() == '':\n",
107
+ " instructions.append(dataset['train'][i]['instruction'])\n",
108
+ " train, test = train_test_split(instructions, test_size=0.2, random_state=42)\n",
109
+ " return train, test\n",
110
+ "\n",
111
+ "harmful_inst_train, harmful_inst_test = get_harmful_instructions()\n",
112
+ "harmless_inst_train, harmless_inst_test = get_harmless_instructions()\n",
113
+ "\n",
114
+ "CHAT_TEMPLATE = \"\"\"<|user|>\\n{instruction}<|end|>\\n<|assistant|>\"\"\" # phi-3 chat template\n",
115
+ "\n",
116
+ "CHAT_TEMPLATE = \"\"\"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\" # llama-3 chat template\n"
117
+ ]
118
+ },
119
+ {
120
+ "cell_type": "markdown",
121
+ "metadata": {
122
+ "id": "6ZOoJagxD49V"
123
+ },
124
+ "source": [
125
+ "### Load model (multi-GPU support)"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": null,
131
+ "metadata": {
132
+ "colab": {
133
+ "base_uri": "https://localhost:8080/",
134
+ "height": 191,
135
+ "referenced_widgets": [
136
+ "ad063e2c68a44f009bfab68c141c09be",
137
+ "89ee88168c474e9fbcf4a17f1483eff4",
138
+ "3877270cf4bc42a9b6142cce7a5d8c54",
139
+ "9a5611a341ed4673aaaf2f463f685d7c",
140
+ "a2de63dfbd6c485e841c6fcd1fefe451",
141
+ "c362d50107dd4a2db0d1a79da2af8d57",
142
+ "ffa85c694b694425999187b346c7ecfe",
143
+ "ec8f6f360a2243b0ac98d34e825ba378",
144
+ "f2ee188bfaa84e9680dbc296b1adbef6",
145
+ "e973493cd6d14381bb4ad2f82417e8a9",
146
+ "89797f6e82104058af92e3ceb094af66"
147
+ ]
148
+ },
149
+ "id": "Vnp65Vsg5x-5",
150
+ "outputId": "25fb5805-fe31-44b0-8f73-6fabc230d261"
151
+ },
152
+ "outputs": [],
153
+ "source": [
154
+ "MODEL_PATH = 'meta-llama/Meta-Llama-3-70B-Instruct'\n",
155
+ "\n",
156
+ "# little hack/tip:\n",
157
+ "# if you're dealing with a fine-tuned model of a \"supported\" model by transformer lens\n",
158
+ "# you can replicate the 'model path' of the supported model in your working directory\n",
159
+ "# e.g. rename the folder of 'dolphin-2.9-llama3-8b' to 'Meta-Llama-3-70B-Instruct', and put that into a folder called 'meta-llama'\n",
160
+ "# now transformers will accept 'meta-llama/Meta-Llama-3-70B-Instruct' as the model path for the model you're using, AND you don't have to add the model name to HookedTransformers\n",
161
+ "# make sure the model architecture and configs really do match though! \n",
162
+ "\n",
163
+ "model = HookedTransformer.from_pretrained_no_processing(\n",
164
+ " MODEL_PATH,\n",
165
+ " #local_files_only=True, # you can use local_files_only=True as a kwarg to from_pretrained_no_processing to enforce using the model from a local directory\n",
166
+ " dtype=torch.bfloat16, # you may want to try full precision if you can. bfloat16 is a good compromise though, but may not work in certain conditions or on certain hardware. DYOR\n",
167
+ " default_padding_side='left'\n",
168
+ ")\n",
169
+ "\n",
170
+ "\n",
171
+ "model.tokenizer.padding_side = 'left'\n",
172
+ "model.tokenizer.pad_token = model.tokenizer.eos_token"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "markdown",
177
+ "metadata": {
178
+ "id": "rF7e-u20EFTe"
179
+ },
180
+ "source": [
181
+ "#### Load model utility stuff"
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "code",
186
+ "execution_count": null,
187
+ "metadata": {},
188
+ "outputs": [],
189
+ "source": [
190
+ "def tokenize_instructions_chat(\n",
191
+ " tokenizer: AutoTokenizer,\n",
192
+ " instructions: List[str]\n",
193
+ ") -> Int[Tensor, 'batch_size seq_len']:\n",
194
+ " prompts = [CHAT_TEMPLATE.format(instruction=instruction) for instruction in instructions]\n",
195
+ " return tokenizer(prompts, padding=True, truncation=False, return_tensors=\"pt\").input_ids\n",
196
+ "\n",
197
+ "tokenize_instructions_fn = functools.partial(tokenize_instructions_chat, tokenizer=model.tokenizer)\n",
198
+ "def _generate_with_hooks(\n",
199
+ " model: HookedTransformer,\n",
200
+ " toks: Int[Tensor, 'batch_size seq_len'],\n",
201
+ " max_tokens_generated: int = 64,\n",
202
+ " fwd_hooks = [],\n",
203
+ ") -> List[str]:\n",
204
+ " all_toks = torch.zeros((toks.shape[0], toks.shape[1] + max_tokens_generated), dtype=torch.long, device=toks.device)\n",
205
+ " all_toks[:, :toks.shape[1]] = toks\n",
206
+ " for i in range(max_tokens_generated):\n",
207
+ " with model.hooks(fwd_hooks=fwd_hooks):\n",
208
+ " logits = model(all_toks[:, :-max_tokens_generated + i])\n",
209
+ " next_tokens = logits[:, -1, :].argmax(dim=-1) # greedy sampling (temperature=0)\n",
210
+ " all_toks[:,-max_tokens_generated+i] = next_tokens\n",
211
+ " return model.tokenizer.batch_decode(all_toks[:, toks.shape[1]:], skip_special_tokens=True)\n",
212
+ "\n",
213
+ "def get_generations(\n",
214
+ " model: HookedTransformer,\n",
215
+ " instructions: List[str],\n",
216
+ " tokenize_instructions_fn: Callable[[List[str]], Int[Tensor, 'batch_size seq_len']],\n",
217
+ " fwd_hooks = [],\n",
218
+ " max_tokens_generated: int = 64,\n",
219
+ " batch_size: int = 4,\n",
220
+ ") -> List[str]:\n",
221
+ " generations = []\n",
222
+ " for i in tqdm(range(0, len(instructions), batch_size)):\n",
223
+ " toks = tokenize_instructions_fn(instructions=instructions[i:i+batch_size])\n",
224
+ " generation = _generate_with_hooks(\n",
225
+ " model,\n",
226
+ " toks,\n",
227
+ " max_tokens_generated=max_tokens_generated,\n",
228
+ " fwd_hooks=fwd_hooks,\n",
229
+ " )\n",
230
+ " generations.extend(generation)\n",
231
+ " return generations\n"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "markdown",
236
+ "metadata": {},
237
+ "source": [
238
+ "#### Useful cell for clearing RAM/VRAM"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": null,
244
+ "metadata": {},
245
+ "outputs": [],
246
+ "source": [
247
+ "# run this cell if you bump into memory issues\n",
248
+ "try:\n",
249
+ " del harmless_logits\n",
250
+ "except Exception:\n",
251
+ " pass\n",
252
+ "try:\n",
253
+ " del harmful_logits\n",
254
+ "except Exception:\n",
255
+ " pass\n",
256
+ "gc.collect(); torch.cuda.empty_cache()"
257
+ ]
258
+ },
259
+ {
260
+ "cell_type": "markdown",
261
+ "metadata": {
262
+ "id": "W9O8dm0_EQRk"
263
+ },
264
+ "source": [
265
+ "## Finding potential \"refusal directions\" (batched)"
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": null,
271
+ "metadata": {
272
+ "vscode": {
273
+ "languageId": "julia"
274
+ }
275
+ },
276
+ "outputs": [],
277
+ "source": [
278
+ "harmful = {}\n",
279
+ "harmless = {}\n",
280
+ "\n",
281
+ "# may want to spare your RAM and cycles here. can use '32' here instead or something like the paper\n",
282
+ "N_INST_TRAIN = min(len(harmful_inst_train), len(harmless_inst_train)) \n",
283
+ "\n",
284
+ "# load the full training set here to align all the dimensions\n",
285
+ "toks = tokenize_instructions_fn(instructions=harmful_inst_train[:N_INST_TRAIN]+harmless_inst_train[:N_INST_TRAIN])\n",
286
+ "harmful_toks,harmless_toks = toks.split(N_INST_TRAIN)\n",
287
+ "\n",
288
+ "batch_size = 48 # adjust this based on available VRAM\n",
289
+ "\n",
290
+ "for i in tqdm(range(0, N_INST_TRAIN // batch_size + (N_INST_TRAIN % batch_size > 0))):\n",
291
+ " id = i*batch_size\n",
292
+ " e = min(N_INST_TRAIN,id+batch_size)\n",
293
+ "\n",
294
+ " # run the models on harmful and harmless prompts, cache their activations separately.\n",
295
+ " harmful_logits, harmful_cache = model.run_with_cache(harmful_toks[id:e], names_filter=lambda hook_name: 'resid' in hook_name, device='cpu', reset_hooks_end=True)\n",
296
+ " harmless_logits, harmless_cache = model.run_with_cache(harmless_toks[id:e], names_filter=lambda hook_name: 'resid' in hook_name, device='cpu', reset_hooks_end=True)\n",
297
+ " \n",
298
+ " for key in harmful_cache:\n",
299
+ " if key not in harmful:\n",
300
+ " harmful[key] = [harmful_cache[key]]\n",
301
+ " harmless[key] = [harmless_cache[key]]\n",
302
+ " else:\n",
303
+ " harmful[key].append(harmful_cache[key])\n",
304
+ " harmless[key].append(harmless_cache[key])\n",
305
+ "\n",
306
+ " # force Python & PyTorch to clear GPU and CPU RAM where possible\n",
307
+ " del harmful_logits, harmless_logits, harmful_cache, harmless_cache\n",
308
+ " gc.collect()\n",
309
+ " torch.cuda.empty_cache()\n",
310
+ "\n",
311
+ "harmful = {k:torch.cat(v) for k,v in harmful.items()}\n",
312
+ "harmless = {k:torch.cat(v) for k,v in harmless.items()}"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "markdown",
317
+ "metadata": {},
318
+ "source": [
319
+ "### Compute activations into refusal directions"
320
+ ]
321
+ },
322
+ {
323
+ "cell_type": "code",
324
+ "execution_count": null,
325
+ "metadata": {
326
+ "id": "tqD5E8Vc_w5d"
327
+ },
328
+ "outputs": [],
329
+ "source": [
330
+ "# compute difference of means between harmful and harmless activations at intermediate layers\n",
331
+ "\n",
332
+ "def get_act_idx(cache_dict, act_name, layer):\n",
333
+ " key = (act_name, layer,)\n",
334
+ " return cache_dict[utils.get_act_name(*key)]\n",
335
+ "\n",
336
+ "activation_layers = ['resid_pre', 'resid_mid', 'resid_post']\n",
337
+ "\n",
338
+ "activation_refusals = {k:[] for k in activation_layers}\n",
339
+ "\n",
340
+ "for layer_num in range(1,model.cfg.n_layers):\n",
341
+ " pos = -1\n",
342
+ "\n",
343
+ " for layer in activation_layers:\n",
344
+ " harmful_mean_act = get_act_idx(harmful, layer, layer_num)[:, pos, :].mean(dim=0)\n",
345
+ " harmless_mean_act = get_act_idx(harmless, layer, layer_num)[:, pos, :].mean(dim=0)\n",
346
+ " \n",
347
+ " refusal_dir = harmful_mean_act - harmless_mean_act\n",
348
+ " refusal_dir = refusal_dir / refusal_dir.norm()\n",
349
+ " activation_refusals[layer].append(refusal_dir)\n",
350
+ "\n",
351
+ "# save to file so you don't have to re-build later\n",
352
+ "torch.save(activation_refusals, 'refusal_dirs.pth')\n",
353
+ "refusal_dirs = activation_refusals"
354
+ ]
355
+ },
356
+ {
357
+ "cell_type": "markdown",
358
+ "metadata": {},
359
+ "source": [
360
+ "### Alternatively, load a previous attempt's pth:"
361
+ ]
362
+ },
363
+ {
364
+ "cell_type": "code",
365
+ "execution_count": null,
366
+ "metadata": {},
367
+ "outputs": [],
368
+ "source": [
369
+ "activation_refusals = torch.load('refusal_dirs.pth')\n",
370
+ "refusal_dirs = activation_refusals"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "markdown",
375
+ "metadata": {
376
+ "id": "2EoxY5i1CWe3"
377
+ },
378
+ "source": [
379
+ "## Ablate \"refusal direction\" via inference-time intervention\n",
380
+ "\n",
381
+ "Given a \"refusal direction\" $\\widehat{r} \\in \\mathbb{R}^{d_{\\text{model}}}$ with unit norm, we can ablate this direction from the model's activations $a_{l}$:\n",
382
+ "$${a}_{l}' \\leftarrow a_l - (a_l \\cdot \\widehat{r}) \\widehat{r}$$\n",
383
+ "\n",
384
+ "By performing this ablation on all intermediate activations, we enforce that the model can never express this direction (or \"feature\")."
385
+ ]
386
+ },
387
+ {
388
+ "cell_type": "markdown",
389
+ "metadata": {},
390
+ "source": [
391
+ "### \"Score\" layer activation diffs\n",
392
+ "This is a place with room for improvement in methodology. For now, I'm just doing a rudimentary sort based on difference distance average to find dirs with the most \"change\""
393
+ ]
394
+ },
395
+ {
396
+ "cell_type": "code",
397
+ "execution_count": null,
398
+ "metadata": {},
399
+ "outputs": [],
400
+ "source": [
401
+ "# Get all calculated potential refusal dirs, sort them in Descending order (reverse) based on their mean()\n",
402
+ "activation_layers = ['resid_pre', 'resid_mid', 'resid_post'] # you can use a subset of these if you don't think certain activations are promising\n",
403
+ "\n",
404
+ "activation_layers = ['resid_pre'] # this is usually good enough, though if you've got the compute to spare...\n",
405
+ "activation_scored = sorted([activation_refusals[layer][l-1] for l in range(1,model.cfg.n_layers) for layer in activation_layers], key = lambda x: abs(x.mean()), reverse=True)"
406
+ ]
407
+ },
408
+ {
409
+ "cell_type": "markdown",
410
+ "metadata": {},
411
+ "source": [
412
+ "#### Model ablation testing/brute-forcing the best refusal dir"
413
+ ]
414
+ },
415
+ {
416
+ "cell_type": "markdown",
417
+ "metadata": {},
418
+ "source": [
419
+ "##### Inference-time intervention hook:"
420
+ ]
421
+ },
422
+ {
423
+ "cell_type": "code",
424
+ "execution_count": null,
425
+ "metadata": {
426
+ "id": "26rf-yncB2PT"
427
+ },
428
+ "outputs": [],
429
+ "source": [
430
+ "def direction_ablation_hook(\n",
431
+ " activation: Float[Tensor, \"... d_act\"],\n",
432
+ " hook: HookPoint,\n",
433
+ " direction: Float[Tensor, \"d_act\"]\n",
434
+ "):\n",
435
+ " if activation.device != direction.device:\n",
436
+ " direction = direction.to(activation.device)\n",
437
+ " proj = einops.einsum(activation, direction.view(-1, 1), '... d_act, d_act single -> ... single') * direction\n",
438
+ " return activation - proj"
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "markdown",
443
+ "metadata": {},
444
+ "source": [
445
+ "##### Testing baseline, can skip if you don't care (it will basically just be refusals with a regular model :P)"
446
+ ]
447
+ },
448
+ {
449
+ "cell_type": "code",
450
+ "execution_count": null,
451
+ "metadata": {},
452
+ "outputs": [],
453
+ "source": [
454
+ "N_INST_TEST = 12\n",
455
+ "baseline_generations = get_generations(model, harmful_inst_test[:N_INST_TEST], tokenize_instructions_fn, fwd_hooks=[])\n",
456
+ "for gen in baseline_generations:\n",
457
+ " print(gen)"
458
+ ]
459
+ },
460
+ {
461
+ "cell_type": "markdown",
462
+ "metadata": {},
463
+ "source": [
464
+ "##### Evaluating layers defined earlier (needs human evaluation to determine best layer for refusal inhibition)"
465
+ ]
466
+ },
467
+ {
468
+ "cell_type": "code",
469
+ "execution_count": null,
470
+ "metadata": {
471
+ "colab": {
472
+ "base_uri": "https://localhost:8080/"
473
+ },
474
+ "id": "sR1G5bXoEDty",
475
+ "outputId": "2e610278-3d4c-473e-850f-a9b28d0b13f9"
476
+ },
477
+ "outputs": [],
478
+ "source": [
479
+ "if \"N_INST_TEST\" not in locals() or not N_INST_TEST:\n",
480
+ " N_INST_TEST = 12 # you may want to evaluate more at the cost of additional compute time. by default, batches are size of 4, so I'd recommend making it a multiple of 4.\n",
481
+ "EVAL_N = 10 # Evaluate how many of the top N potential dirs\n",
482
+ "evals = []\n",
483
+ "for refusal_dir in tqdm(activation_scored[:EVAL_N]):\n",
484
+ " intervention_layers = list(range(model.cfg.n_layers)) # all layers\n",
485
+ "\n",
486
+ " hook_fn = functools.partial(direction_ablation_hook,direction=refusal_dir)\n",
487
+ " fwd_hooks = [(utils.get_act_name(act_name, l), hook_fn) for l in intervention_layers for act_name in ['resid_pre', 'resid_mid', 'resid_post']]\n",
488
+ "\n",
489
+ " intervention_generations = get_generations(model, harmful_inst_test[:N_INST_TEST], tokenize_instructions_fn, fwd_hooks=fwd_hooks)\n",
490
+ " evals.append(intervention_generations)\n",
491
+ " \n",
492
+ " #print(intervention_generations) # if you want to watch it as it goes"
493
+ ]
494
+ },
495
+ {
496
+ "cell_type": "markdown",
497
+ "metadata": {},
498
+ "source": [
499
+ "#### Present evals to clever pre-trained non-refusing human"
500
+ ]
501
+ },
502
+ {
503
+ "cell_type": "code",
504
+ "execution_count": null,
505
+ "metadata": {
506
+ "colab": {
507
+ "base_uri": "https://localhost:8080/"
508
+ },
509
+ "id": "pxbJr4vCFCOL",
510
+ "outputId": "47810f58-813e-4014-fee8-cb1406e318c8"
511
+ },
512
+ "outputs": [],
513
+ "source": [
514
+ "for instruction in range(N_INST_TEST):\n",
515
+ " if 'baseline_generations' in locals() and baseline_generations and len(baseline_generations) > instruction:\n",
516
+ " print(f\"INSTRUCTION {instruction}: {repr(harmful_inst_test[instruction])}\")\n",
517
+ " print(Fore.GREEN + f\"BASELINE COMPLETION:\")\n",
518
+ " print(textwrap.fill(repr(baseline_generations[instruction]), width=100, initial_indent='\\t', subsequent_indent='\\t'))\n",
519
+ " for layer_candidate in range(EVAL_N):\n",
520
+ " if len(evals) > layer_candidate and len(evals[layer_candidate]) > instruction:\n",
521
+ " print(Fore.RED + f\"LAYER CANDIDATE #{layer_candidate} INTERVENTION COMPLETION:\")\n",
522
+ " print(textwrap.fill(repr(evals[layer_candidate][instruction]), width=100, initial_indent='\\t', subsequent_indent='\\t'))\n",
523
+ " print(Fore.RESET)"
524
+ ]
525
+ },
526
+ {
527
+ "cell_type": "markdown",
528
+ "metadata": {
529
+ "id": "t9KooaWaCDc_"
530
+ },
531
+ "source": [
532
+ "## Orthogonalize weights w.r.t. \"refusal direction\"\n",
533
+ "\n",
534
+ "We can implement the intervention equivalently by directly orthogonalizing the weight matrices that write to the residual stream with respect to the refusal direction $\\widehat{r}$:\n",
535
+ "$$W_{\\text{out}}' \\leftarrow W_{\\text{out}} - \\widehat{r}\\widehat{r}^{\\mathsf{T}} W_{\\text{out}}$$\n",
536
+ "\n",
537
+ "By orthogonalizing these weight matrices, we enforce that the model is unable to write direction $r$ to the residual stream at all!\n",
538
+ "\n",
539
+ "This is basically how you finalize your layers' weights to represent your orthogonalization for a saved model"
540
+ ]
541
+ },
542
+ {
543
+ "cell_type": "markdown",
544
+ "metadata": {},
545
+ "source": [
546
+ "### Choose your fighter (favorite, ideally non-refusing layer)"
547
+ ]
548
+ },
549
+ {
550
+ "cell_type": "code",
551
+ "execution_count": null,
552
+ "metadata": {},
553
+ "outputs": [],
554
+ "source": [
555
+ "layer_candidate = 2 # e.g. you should choose based on the layer you think aligns to the behavior you like\n",
556
+ "refusal_dir = activation_scored[layer_candidate]"
557
+ ]
558
+ },
559
+ {
560
+ "cell_type": "markdown",
561
+ "metadata": {},
562
+ "source": [
563
+ "### Write ortho'd weights into model"
564
+ ]
565
+ },
566
+ {
567
+ "cell_type": "code",
568
+ "execution_count": null,
569
+ "metadata": {},
570
+ "outputs": [],
571
+ "source": [
572
+ "def get_orthogonalized_matrix(matrix: Float[Tensor, '... d_model'], vec: Float[Tensor, 'd_model']) -> Float[Tensor, '... d_model']:\n",
573
+ " proj = einops.einsum(matrix, vec.view(-1, 1), '... d_model, d_model single -> ... single') * vec\n",
574
+ " return matrix - proj"
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "code",
579
+ "execution_count": null,
580
+ "metadata": {
581
+ "id": "GC7cpMXZCG64"
582
+ },
583
+ "outputs": [],
584
+ "source": [
585
+ "if refusal_dir.device != model.W_E.device:\n",
586
+ " refusal_dir = refusal_dir.to(model.W_E.device)\n",
587
+ "model.W_E.data = get_orthogonalized_matrix(model.W_E, refusal_dir)\n",
588
+ "\n",
589
+ "for block in tqdm(model.blocks):\n",
590
+ " if refusal_dir.device != block.attn.W_O.device:\n",
591
+ " refusal_dir = refusal_dir.to(block.attn.W_O.device)\n",
592
+ " block.attn.W_O.data = get_orthogonalized_matrix(block.attn.W_O, refusal_dir)\n",
593
+ " block.mlp.W_out.data = get_orthogonalized_matrix(block.mlp.W_out, refusal_dir)\n",
594
+ "\n",
595
+ "# save your refusal_dir of choice separately to a file\n",
596
+ "torch.save(refusal_dir,\"ablation.pth\")"
597
+ ]
598
+ },
599
+ {
600
+ "cell_type": "markdown",
601
+ "metadata": {},
602
+ "source": [
603
+ "### Verify model weights are adjusted to match ablation (skippable)"
604
+ ]
605
+ },
606
+ {
607
+ "cell_type": "code",
608
+ "execution_count": null,
609
+ "metadata": {
610
+ "colab": {
611
+ "base_uri": "https://localhost:8080/"
612
+ },
613
+ "id": "1Y-qtouNGf3t",
614
+ "outputId": "5f946460-4ed4-4cf1-e53d-809ebb880f6e"
615
+ },
616
+ "outputs": [],
617
+ "source": [
618
+ "orthogonalized_generations = get_generations(model, harmful_inst_test[:N_INST_TEST], tokenize_instructions_fn, fwd_hooks=[])"
619
+ ]
620
+ },
621
+ {
622
+ "cell_type": "code",
623
+ "execution_count": null,
624
+ "metadata": {
625
+ "colab": {
626
+ "base_uri": "https://localhost:8080/"
627
+ },
628
+ "id": "r68O4_4DG3P7",
629
+ "outputId": "97eeb477-bfd1-4521-8c32-4657d99f3e0c"
630
+ },
631
+ "outputs": [],
632
+ "source": [
633
+ "for i in range(N_INST_TEST):\n",
634
+ " if 'baseline_generations' in locals() and baseline_generations and len(baseline_generations) > i:\n",
635
+ " print(f\"INSTRUCTION {i}: {repr(harmful_inst_test[i])}\")\n",
636
+ " print(Fore.GREEN + f\"BASELINE COMPLETION:\")\n",
637
+ " print(textwrap.fill(repr(baseline_generations[i]), width=100, initial_indent='\\t', subsequent_indent='\\t'))\n",
638
+ " print(Fore.RED + f\"INTERVENTION COMPLETION:\")\n",
639
+ " print(textwrap.fill(repr(evals[layer_candidate][i]), width=100, initial_indent='\\t', subsequent_indent='\\t'))\n",
640
+ " print(Fore.MAGENTA + f\"ORTHOGONALIZED COMPLETION:\")\n",
641
+ " print(textwrap.fill(repr(orthogonalized_generations[i]), width=100, initial_indent='\\t', subsequent_indent='\\t'))\n",
642
+ " print(Fore.RESET)"
643
+ ]
644
+ },
645
+ {
646
+ "cell_type": "markdown",
647
+ "metadata": {
648
+ "vscode": {
649
+ "languageId": "julia"
650
+ }
651
+ },
652
+ "source": [
653
+ "## Save your unruly model\n",
654
+ "This is where you'll need to consult with the original structure of the model you're generating. Below is converting Phi-3 and Llama-3 examples, but you'll need to do differently for others. Or if you just want a \"no thinking\" save, you can use the pytorch save below. Be aware that the structure output by that is not directly convertable however."
655
+ ]
656
+ },
657
+ {
658
+ "cell_type": "markdown",
659
+ "metadata": {},
660
+ "source": [
661
+ "### Simple PyTorch save! (easiest, but least portable)"
662
+ ]
663
+ },
664
+ {
665
+ "cell_type": "code",
666
+ "execution_count": null,
667
+ "metadata": {},
668
+ "outputs": [],
669
+ "source": [
670
+ "torch.save(model, \"pytorch_model.bin\") # can name it whatever you want, and then reload it"
671
+ ]
672
+ },
673
+ {
674
+ "cell_type": "markdown",
675
+ "metadata": {},
676
+ "source": [
677
+ "### Converting models back to HF safetensors (harder)\n",
678
+ "Do note that we only adjust a couple layers in get_orthogonalized_matrix(), so only need to copy 1 + (2*n_layers) over to the original trained model, not the whole lot.\n",
679
+ "\n",
680
+ "You can look to TransformerLens's source code `loading_from_pretrained.py` to see how the layers get converted in. e.g. https://github.com/neelnanda-io/TransformerLens/blob/main/transformer_lens/loading_from_pretrained.py#L1746-L1833 is `convert_llama_weights`, so you can just reverse the steps for the layers that you alter\n",
681
+ "\n",
682
+ "References to convert functions per model:\n",
683
+ "https://github.com/neelnanda-io/TransformerLens/blob/main/transformer_lens/loading_from_pretrained.py#L1475-L1504"
684
+ ]
685
+ },
686
+ {
687
+ "cell_type": "code",
688
+ "execution_count": null,
689
+ "metadata": {},
690
+ "outputs": [],
691
+ "source": [
692
+ "# this is probably useful for any conversion\n",
693
+ "cfg = model.cfg\n",
694
+ "\n",
695
+ "state_dict = model.state_dict()\n",
696
+ "\n",
697
+ "hf_model = AutoModelForCausalLM.from_pretrained(MODEL_PATH,torch_dtype=torch.bfloat16) # load the original model as a regular unhooked Transformer -- don't need to load it into GPU as it's just for saving\n",
698
+ "lm_model = hf_model.model"
699
+ ]
700
+ },
701
+ {
702
+ "cell_type": "markdown",
703
+ "metadata": {},
704
+ "source": [
705
+ "#### Llama-3 conversion"
706
+ ]
707
+ },
708
+ {
709
+ "cell_type": "code",
710
+ "execution_count": null,
711
+ "metadata": {
712
+ "id": "exUh3PEHRe9x"
713
+ },
714
+ "outputs": [],
715
+ "source": [
716
+ "lm_model.embed_tokens.weight = torch.nn.Parameter(state_dict[\"embed.W_E\"].cpu())\n",
717
+ "\n",
718
+ "for l in range(cfg.n_layers):\n",
719
+ " lm_model.layers[l].self_attn.o_proj.weight = torch.nn.Parameter(einops.rearrange(state_dict[f\"blocks.{l}.attn.W_O\"], \"n h m->m (n h)\", n=cfg.n_heads).contiguous())\n",
720
+ " lm_model.layers[l].mlp.down_proj.weight = torch.nn.Parameter(torch.transpose(state_dict[f\"blocks.{l}.mlp.W_out\"].contiguous(),0,1))\n"
721
+ ]
722
+ },
723
+ {
724
+ "cell_type": "markdown",
725
+ "metadata": {},
726
+ "source": [
727
+ "#### Phi-3 conversion"
728
+ ]
729
+ },
730
+ {
731
+ "cell_type": "code",
732
+ "execution_count": null,
733
+ "metadata": {},
734
+ "outputs": [],
735
+ "source": [
736
+ "\n",
737
+ "lm_model.embed_tokens.weight = state_dict[\"embed.W_E\"]\n",
738
+ "\n",
739
+ "for l in range(cfg.n_layers):\n",
740
+ "\n",
741
+ " W_O = einops.rearrange(\n",
742
+ " state_dict[f\"blocks.{l}.attn.W_O\"], \"n_head d_head d_model -> d_model (n_head d_head)\", n_head=cfg.n_heads\n",
743
+ " )\n",
744
+ " lm_model.layers[l].self_attn.o_proj.weight = torch.nn.Parameter(W_O.contiguous())\n",
745
+ "\n",
746
+ " lm_model.layers[l].mlp.down_proj.weight = torch.nn.Parameter(torch.transpose(state_dict[f\"blocks.{l}.mlp.W_out\"].cpu(), 0, 1).contiguous())\n"
747
+ ]
748
+ },
749
+ {
750
+ "cell_type": "markdown",
751
+ "metadata": {},
752
+ "source": [
753
+ "#### Save converted model"
754
+ ]
755
+ },
756
+ {
757
+ "cell_type": "code",
758
+ "execution_count": null,
759
+ "metadata": {},
760
+ "outputs": [],
761
+ "source": [
762
+ "hf_model.save(\"path/to/my_saved_based_model_dir/\")"
763
+ ]
764
+ }
765
+ ],
766
+ "metadata": {
767
+ "accelerator": "GPU",
768
+ "colab": {
769
+ "gpuType": "T4",
770
+ "provenance": [],
771
+ "toc_visible": true
772
+ },
773
+ "kernelspec": {
774
+ "display_name": "Python 3",
775
+ "name": "python3"
776
+ },
777
+ "language_info": {
778
+ "name": "python"
779
+ },
780
+ "widgets": {
781
+ "application/vnd.jupyter.widget-state+json": {
782
+ "3877270cf4bc42a9b6142cce7a5d8c54": {
783
+ "model_module": "@jupyter-widgets/controls",
784
+ "model_module_version": "1.5.0",
785
+ "model_name": "FloatProgressModel",
786
+ "state": {
787
+ "_dom_classes": [],
788
+ "_model_module": "@jupyter-widgets/controls",
789
+ "_model_module_version": "1.5.0",
790
+ "_model_name": "FloatProgressModel",
791
+ "_view_count": null,
792
+ "_view_module": "@jupyter-widgets/controls",
793
+ "_view_module_version": "1.5.0",
794
+ "_view_name": "ProgressView",
795
+ "bar_style": "success",
796
+ "description": "",
797
+ "description_tooltip": null,
798
+ "layout": "IPY_MODEL_ec8f6f360a2243b0ac98d34e825ba378",
799
+ "max": 2,
800
+ "min": 0,
801
+ "orientation": "horizontal",
802
+ "style": "IPY_MODEL_f2ee188bfaa84e9680dbc296b1adbef6",
803
+ "value": 2
804
+ }
805
+ },
806
+ "89797f6e82104058af92e3ceb094af66": {
807
+ "model_module": "@jupyter-widgets/controls",
808
+ "model_module_version": "1.5.0",
809
+ "model_name": "DescriptionStyleModel",
810
+ "state": {
811
+ "_model_module": "@jupyter-widgets/controls",
812
+ "_model_module_version": "1.5.0",
813
+ "_model_name": "DescriptionStyleModel",
814
+ "_view_count": null,
815
+ "_view_module": "@jupyter-widgets/base",
816
+ "_view_module_version": "1.2.0",
817
+ "_view_name": "StyleView",
818
+ "description_width": ""
819
+ }
820
+ },
821
+ "89ee88168c474e9fbcf4a17f1483eff4": {
822
+ "model_module": "@jupyter-widgets/controls",
823
+ "model_module_version": "1.5.0",
824
+ "model_name": "HTMLModel",
825
+ "state": {
826
+ "_dom_classes": [],
827
+ "_model_module": "@jupyter-widgets/controls",
828
+ "_model_module_version": "1.5.0",
829
+ "_model_name": "HTMLModel",
830
+ "_view_count": null,
831
+ "_view_module": "@jupyter-widgets/controls",
832
+ "_view_module_version": "1.5.0",
833
+ "_view_name": "HTMLView",
834
+ "description": "",
835
+ "description_tooltip": null,
836
+ "layout": "IPY_MODEL_c362d50107dd4a2db0d1a79da2af8d57",
837
+ "placeholder": "​",
838
+ "style": "IPY_MODEL_ffa85c694b694425999187b346c7ecfe",
839
+ "value": "Loading checkpoint shards: 100%"
840
+ }
841
+ },
842
+ "9a5611a341ed4673aaaf2f463f685d7c": {
843
+ "model_module": "@jupyter-widgets/controls",
844
+ "model_module_version": "1.5.0",
845
+ "model_name": "HTMLModel",
846
+ "state": {
847
+ "_dom_classes": [],
848
+ "_model_module": "@jupyter-widgets/controls",
849
+ "_model_module_version": "1.5.0",
850
+ "_model_name": "HTMLModel",
851
+ "_view_count": null,
852
+ "_view_module": "@jupyter-widgets/controls",
853
+ "_view_module_version": "1.5.0",
854
+ "_view_name": "HTMLView",
855
+ "description": "",
856
+ "description_tooltip": null,
857
+ "layout": "IPY_MODEL_e973493cd6d14381bb4ad2f82417e8a9",
858
+ "placeholder": "​",
859
+ "style": "IPY_MODEL_89797f6e82104058af92e3ceb094af66",
860
+ "value": " 2/2 [00:18&lt;00:00,  8.85s/it]"
861
+ }
862
+ },
863
+ "a2de63dfbd6c485e841c6fcd1fefe451": {
864
+ "model_module": "@jupyter-widgets/base",
865
+ "model_module_version": "1.2.0",
866
+ "model_name": "LayoutModel",
867
+ "state": {
868
+ "_model_module": "@jupyter-widgets/base",
869
+ "_model_module_version": "1.2.0",
870
+ "_model_name": "LayoutModel",
871
+ "_view_count": null,
872
+ "_view_module": "@jupyter-widgets/base",
873
+ "_view_module_version": "1.2.0",
874
+ "_view_name": "LayoutView",
875
+ "align_content": null,
876
+ "align_items": null,
877
+ "align_self": null,
878
+ "border": null,
879
+ "bottom": null,
880
+ "display": null,
881
+ "flex": null,
882
+ "flex_flow": null,
883
+ "grid_area": null,
884
+ "grid_auto_columns": null,
885
+ "grid_auto_flow": null,
886
+ "grid_auto_rows": null,
887
+ "grid_column": null,
888
+ "grid_gap": null,
889
+ "grid_row": null,
890
+ "grid_template_areas": null,
891
+ "grid_template_columns": null,
892
+ "grid_template_rows": null,
893
+ "height": null,
894
+ "justify_content": null,
895
+ "justify_items": null,
896
+ "left": null,
897
+ "margin": null,
898
+ "max_height": null,
899
+ "max_width": null,
900
+ "min_height": null,
901
+ "min_width": null,
902
+ "object_fit": null,
903
+ "object_position": null,
904
+ "order": null,
905
+ "overflow": null,
906
+ "overflow_x": null,
907
+ "overflow_y": null,
908
+ "padding": null,
909
+ "right": null,
910
+ "top": null,
911
+ "visibility": null,
912
+ "width": null
913
+ }
914
+ },
915
+ "ad063e2c68a44f009bfab68c141c09be": {
916
+ "model_module": "@jupyter-widgets/controls",
917
+ "model_module_version": "1.5.0",
918
+ "model_name": "HBoxModel",
919
+ "state": {
920
+ "_dom_classes": [],
921
+ "_model_module": "@jupyter-widgets/controls",
922
+ "_model_module_version": "1.5.0",
923
+ "_model_name": "HBoxModel",
924
+ "_view_count": null,
925
+ "_view_module": "@jupyter-widgets/controls",
926
+ "_view_module_version": "1.5.0",
927
+ "_view_name": "HBoxView",
928
+ "box_style": "",
929
+ "children": [
930
+ "IPY_MODEL_89ee88168c474e9fbcf4a17f1483eff4",
931
+ "IPY_MODEL_3877270cf4bc42a9b6142cce7a5d8c54",
932
+ "IPY_MODEL_9a5611a341ed4673aaaf2f463f685d7c"
933
+ ],
934
+ "layout": "IPY_MODEL_a2de63dfbd6c485e841c6fcd1fefe451"
935
+ }
936
+ },
937
+ "c362d50107dd4a2db0d1a79da2af8d57": {
938
+ "model_module": "@jupyter-widgets/base",
939
+ "model_module_version": "1.2.0",
940
+ "model_name": "LayoutModel",
941
+ "state": {
942
+ "_model_module": "@jupyter-widgets/base",
943
+ "_model_module_version": "1.2.0",
944
+ "_model_name": "LayoutModel",
945
+ "_view_count": null,
946
+ "_view_module": "@jupyter-widgets/base",
947
+ "_view_module_version": "1.2.0",
948
+ "_view_name": "LayoutView",
949
+ "align_content": null,
950
+ "align_items": null,
951
+ "align_self": null,
952
+ "border": null,
953
+ "bottom": null,
954
+ "display": null,
955
+ "flex": null,
956
+ "flex_flow": null,
957
+ "grid_area": null,
958
+ "grid_auto_columns": null,
959
+ "grid_auto_flow": null,
960
+ "grid_auto_rows": null,
961
+ "grid_column": null,
962
+ "grid_gap": null,
963
+ "grid_row": null,
964
+ "grid_template_areas": null,
965
+ "grid_template_columns": null,
966
+ "grid_template_rows": null,
967
+ "height": null,
968
+ "justify_content": null,
969
+ "justify_items": null,
970
+ "left": null,
971
+ "margin": null,
972
+ "max_height": null,
973
+ "max_width": null,
974
+ "min_height": null,
975
+ "min_width": null,
976
+ "object_fit": null,
977
+ "object_position": null,
978
+ "order": null,
979
+ "overflow": null,
980
+ "overflow_x": null,
981
+ "overflow_y": null,
982
+ "padding": null,
983
+ "right": null,
984
+ "top": null,
985
+ "visibility": null,
986
+ "width": null
987
+ }
988
+ },
989
+ "e973493cd6d14381bb4ad2f82417e8a9": {
990
+ "model_module": "@jupyter-widgets/base",
991
+ "model_module_version": "1.2.0",
992
+ "model_name": "LayoutModel",
993
+ "state": {
994
+ "_model_module": "@jupyter-widgets/base",
995
+ "_model_module_version": "1.2.0",
996
+ "_model_name": "LayoutModel",
997
+ "_view_count": null,
998
+ "_view_module": "@jupyter-widgets/base",
999
+ "_view_module_version": "1.2.0",
1000
+ "_view_name": "LayoutView",
1001
+ "align_content": null,
1002
+ "align_items": null,
1003
+ "align_self": null,
1004
+ "border": null,
1005
+ "bottom": null,
1006
+ "display": null,
1007
+ "flex": null,
1008
+ "flex_flow": null,
1009
+ "grid_area": null,
1010
+ "grid_auto_columns": null,
1011
+ "grid_auto_flow": null,
1012
+ "grid_auto_rows": null,
1013
+ "grid_column": null,
1014
+ "grid_gap": null,
1015
+ "grid_row": null,
1016
+ "grid_template_areas": null,
1017
+ "grid_template_columns": null,
1018
+ "grid_template_rows": null,
1019
+ "height": null,
1020
+ "justify_content": null,
1021
+ "justify_items": null,
1022
+ "left": null,
1023
+ "margin": null,
1024
+ "max_height": null,
1025
+ "max_width": null,
1026
+ "min_height": null,
1027
+ "min_width": null,
1028
+ "object_fit": null,
1029
+ "object_position": null,
1030
+ "order": null,
1031
+ "overflow": null,
1032
+ "overflow_x": null,
1033
+ "overflow_y": null,
1034
+ "padding": null,
1035
+ "right": null,
1036
+ "top": null,
1037
+ "visibility": null,
1038
+ "width": null
1039
+ }
1040
+ },
1041
+ "ec8f6f360a2243b0ac98d34e825ba378": {
1042
+ "model_module": "@jupyter-widgets/base",
1043
+ "model_module_version": "1.2.0",
1044
+ "model_name": "LayoutModel",
1045
+ "state": {
1046
+ "_model_module": "@jupyter-widgets/base",
1047
+ "_model_module_version": "1.2.0",
1048
+ "_model_name": "LayoutModel",
1049
+ "_view_count": null,
1050
+ "_view_module": "@jupyter-widgets/base",
1051
+ "_view_module_version": "1.2.0",
1052
+ "_view_name": "LayoutView",
1053
+ "align_content": null,
1054
+ "align_items": null,
1055
+ "align_self": null,
1056
+ "border": null,
1057
+ "bottom": null,
1058
+ "display": null,
1059
+ "flex": null,
1060
+ "flex_flow": null,
1061
+ "grid_area": null,
1062
+ "grid_auto_columns": null,
1063
+ "grid_auto_flow": null,
1064
+ "grid_auto_rows": null,
1065
+ "grid_column": null,
1066
+ "grid_gap": null,
1067
+ "grid_row": null,
1068
+ "grid_template_areas": null,
1069
+ "grid_template_columns": null,
1070
+ "grid_template_rows": null,
1071
+ "height": null,
1072
+ "justify_content": null,
1073
+ "justify_items": null,
1074
+ "left": null,
1075
+ "margin": null,
1076
+ "max_height": null,
1077
+ "max_width": null,
1078
+ "min_height": null,
1079
+ "min_width": null,
1080
+ "object_fit": null,
1081
+ "object_position": null,
1082
+ "order": null,
1083
+ "overflow": null,
1084
+ "overflow_x": null,
1085
+ "overflow_y": null,
1086
+ "padding": null,
1087
+ "right": null,
1088
+ "top": null,
1089
+ "visibility": null,
1090
+ "width": null
1091
+ }
1092
+ },
1093
+ "f2ee188bfaa84e9680dbc296b1adbef6": {
1094
+ "model_module": "@jupyter-widgets/controls",
1095
+ "model_module_version": "1.5.0",
1096
+ "model_name": "ProgressStyleModel",
1097
+ "state": {
1098
+ "_model_module": "@jupyter-widgets/controls",
1099
+ "_model_module_version": "1.5.0",
1100
+ "_model_name": "ProgressStyleModel",
1101
+ "_view_count": null,
1102
+ "_view_module": "@jupyter-widgets/base",
1103
+ "_view_module_version": "1.2.0",
1104
+ "_view_name": "StyleView",
1105
+ "bar_color": null,
1106
+ "description_width": ""
1107
+ }
1108
+ },
1109
+ "ffa85c694b694425999187b346c7ecfe": {
1110
+ "model_module": "@jupyter-widgets/controls",
1111
+ "model_module_version": "1.5.0",
1112
+ "model_name": "DescriptionStyleModel",
1113
+ "state": {
1114
+ "_model_module": "@jupyter-widgets/controls",
1115
+ "_model_module_version": "1.5.0",
1116
+ "_model_name": "DescriptionStyleModel",
1117
+ "_view_count": null,
1118
+ "_view_module": "@jupyter-widgets/base",
1119
+ "_view_module_version": "1.2.0",
1120
+ "_view_name": "StyleView",
1121
+ "description_width": ""
1122
+ }
1123
+ }
1124
+ }
1125
+ }
1126
+ },
1127
+ "nbformat": 4,
1128
+ "nbformat_minor": 0
1129
+ }