')
+ .prependTo(cell.element)
+ .on('click', click_solution_lock);
+ element_set_locked(cell, locked);
+ return ctrl;
+ }
+
+ function remove_element(cell) {
+ cell.element.find('.exercise').remove();
+ }
+
+ function element_set_locked(cell, locked) {
+ return cell.element.find('.exercise')
+ .toggleClass('fa-plus-square-o', locked)
+ .toggleClass('fa-minus-square-o', !locked);
+ }
+
+ function refresh_exercises() {
+ var in_exercise = false;
+ IPython.notebook.get_cells().forEach(function(cell) {
+ if (in_exercise && cell.metadata.solution !== undefined && !cell.metadata.solution_first) {
+ cell.element.toggleClass('hidden', cell.metadata.solution === 'hidden');
+ } else {
+ in_exercise = false;
+ }
+ if (!in_exercise && cell.metadata.solution !== undefined) {
+ in_exercise = true;
+ add_element(cell);
+ }
+ });
+ }
+
+ function load_ipython_extension() {
+ // add css
+ $('
')
+ .attr('href', requirejs.toUrl('./main.css'))
+ .appendTo('head');
+
+ // Hide/display existing solutions at startup
+ events.on('notebook_loaded.Notebook', refresh_exercises);
+ if (IPython.notebook._fully_loaded) refresh_exercises();
+
+ var action_name = IPython.keyboard_manager.actions.register({
+ help : 'Exercise: Create/Remove exercise',
+ help_index: 'ht',
+ icon : 'fa-mortar-board',
+ handler : create_remove_exercise
+ }, 'create_remove_exercise', 'exercise');
+
+ IPython.notebook.config.loaded.then(function() {
+ $.extend(true, cfg, IPython.notebook.config.data);
+
+ if (cfg.add_button) {
+ IPython.toolbar.add_buttons_group([action_name]);
+ }
+ if (cfg.use_hotkey && cfg.hotkey) {
+ var cmd_shrts = {};
+ cmd_shrts[cfg.hotkey] = action_name;
+ IPython.keyboard_manager.command_shortcuts.add_shortcuts(cmd_shrts);
+ }
+ }).catch(function(err) {
+ console.warn('[exercise] error:', err);
+ });
+ }
+
+ return {
+ load_ipython_extension: load_ipython_extension,
+ };
+});
diff --git a/.local/share/jupyter/nbextensions/exercise2/icon.png b/.local/share/jupyter/nbextensions/exercise2/icon.png
new file mode 100644
index 0000000000000000000000000000000000000000..c06473a6c84fb51c49711580ab5e5fbb26999492
Binary files /dev/null and b/.local/share/jupyter/nbextensions/exercise2/icon.png differ
diff --git a/.local/share/jupyter/nbextensions/exercise2/image.gif b/.local/share/jupyter/nbextensions/exercise2/image.gif
new file mode 100644
index 0000000000000000000000000000000000000000..0fe22308b600e50d9f51ebe6a7164475ddb81c4e
Binary files /dev/null and b/.local/share/jupyter/nbextensions/exercise2/image.gif differ
diff --git a/.local/share/jupyter/nbextensions/exercise2/main.css b/.local/share/jupyter/nbextensions/exercise2/main.css
new file mode 100644
index 0000000000000000000000000000000000000000..f7ea2c299c7f9f18ea29ea2b5323261a6c450d8e
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/exercise2/main.css
@@ -0,0 +1,60 @@
+.exercise2 {
+ display: flex;
+ width: 100%;
+ flex-direction: row;
+ align-content: flex-end;
+}
+
+.onoffswitch {
+ display: inline;
+ position: relative; width: 167px;
+ margin-top:8px;
+ -webkit-user-select:none; -moz-user-select:none; -ms-user-select: none;
+}
+.onoffswitch-checkbox {
+ display: none;
+}
+.onoffswitch-label {
+ display: block; overflow: hidden; cursor: pointer;
+ border: 2px solid #999999; border-radius: 20px;
+ margin:0;
+}
+.onoffswitch-inner {
+ display: block; width: 200%; margin-left: -100%;
+ transition: margin 0.3s ease-in 0s;
+}
+.onoffswitch-inner:before, .onoffswitch-inner:after {
+ display: block; float: left; width: 50%; height: 30px; padding: 0; line-height: 30px;
+ font-size: 15px; color: white; font-family: Trebuchet, Arial, sans-serif; font-weight: bold;
+ box-sizing: border-box;
+}
+.onoffswitch-inner:before {
+ content: "Hide Solution";
+ padding-left: 10px;
+ background-color: #34A7C1; color: #FFFFFF;
+ }
+
+.onoffswitch-inner:after {
+ content: "Show Solution";
+ padding-right: 10px;
+ background-color: #73FA7E; color: #999999;
+ text-align: right;
+}
+.onoffswitch-switch {
+ display: block; width: 14px; margin: 6px;
+ padding-top: 0px;
+ background: #FFFFFF;
+ position: absolute; top: 0; bottom: 0;
+ text-align: center;
+ vertical-align: middle;
+ right: 133px;
+ border: 2px solid #999999; border-radius: 20px;
+ transition: all 0.25s ease-in 0s;
+}
+.onoffswitch-checkbox:checked + .onoffswitch-label .onoffswitch-inner {
+ margin-left: 0;
+}
+.onoffswitch-checkbox:checked + .onoffswitch-label .onoffswitch-switch {
+ right: 0px;
+}
+
diff --git a/.local/share/jupyter/nbextensions/exercise2/main.js b/.local/share/jupyter/nbextensions/exercise2/main.js
new file mode 100644
index 0000000000000000000000000000000000000000..f83609104d7ac7ccbadd5be78bf98324f494cdb4
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/exercise2/main.js
@@ -0,0 +1,169 @@
+// Copyright (c) IPython-Contrib Team.
+// Distributed under the terms of the Modified BSD License.
+
+// Hide or display solutions in a notebook
+
+// dec 6, 2017 @jcb91: use bootstrap 'hidden' class to play nicely with collapsible_headings
+// december 30, 2015: update to notebook 4.1.x
+// updated on december 22, 2015 to allow consecutive exercises
+// exercise2: built by @jfbercher from an earlier work by @junasch october 2015) - see readme.md
+
+define([
+ 'base/js/namespace',
+ 'jquery',
+ 'require',
+ 'base/js/events',
+], function(IPython, $, requirejs, events) {
+ "use strict";
+
+ var cfg = {
+ add_button: true,
+ use_hotkey: true,
+ hotkey: 'Alt-D',
+ };
+
+ /**
+ * handle click event
+ *
+ * @method click_solution_lock
+ * @param evt {Event} jquery event
+ */
+ function click_solution_lock(evt) {
+ var cell = IPython.notebook.get_selected_cell();
+ var is_locked = cell.metadata.solution2 === 'hidden';
+ cell.metadata.solution2 = is_locked ? 'shown' : 'hidden';
+ element_set_locked(cell, !is_locked);
+ cell = IPython.notebook.get_next_cell(cell);
+ while (cell !== null && cell.metadata.solution2 !== undefined && !cell.metadata.solution2_first) {
+ cell.element.toggleClass('hidden', !is_locked);
+ cell.metadata.solution2 = is_locked ? 'shown' : 'hidden';
+ cell = IPython.notebook.get_next_cell(cell);
+ }
+ }
+
+ /**
+ * Create or Remove an exercise in selected cells
+ *
+ * @method create_remove_exercise
+ *
+ */
+ function create_remove_exercise() {
+ var lcells = IPython.notebook.get_selected_cells();
+ // It is possible that no cell is selected
+ if (lcells.length < 1) {
+ alert("Exercise extension: \nPlease select some cells...");
+ return;
+ }
+
+ var cell = lcells[0];
+ if (cell.metadata.solution2_first) {
+ remove_element(cell);
+ delete cell.metadata.solution2_first;
+ while (cell !== null && cell.metadata.solution2 !== undefined && !cell.metadata.solution2_first) {
+ delete cell.metadata.solution2;
+ cell.element.removeClass('hidden');
+ cell = IPython.notebook.get_next_cell(cell);
+ }
+ }
+ else {
+ cell.metadata.solution2_first = true;
+ cell.metadata.solution2 = 'hidden';
+ add_element(cell);
+ for (var k = 1; k < lcells.length; k++) {
+ cell = lcells[k];
+ cell.element.addClass('hidden');
+ cell.metadata.solution2 = 'hidden';
+ }
+ }
+ }
+
+ /**
+ * Add a lock control to the given cell
+ */
+ var cbx = 0;
+ function add_element(cell) {
+ var ctrl = cell.element.find('.exercise');
+ if (ctrl.length > 0) return ctrl;
+ var locked = cell.metadata.solution2 === 'hidden';
+ cell.element.css('flex-wrap', 'wrap');
+ cbx += 1;
+ ctrl = $([
+ '
',
+ '
',
+ '
',
+ '
',
+ '
',
+ '
',
+ '
',
+ ' ',
+ '
',
+ '
'
+ ].join('\n'))
+ .appendTo(cell.element);
+ ctrl.find('input')
+ .on('click', click_solution_lock);
+ element_set_locked(cell, locked);
+ return ctrl;
+ }
+
+ function remove_element(cell) {
+ cell.element.find('.exercise').remove();
+ }
+
+ function element_set_locked(cell, locked) {
+ return cell.element.find('.exercise')
+ .prop('checked', !locked);
+ }
+
+ function refresh_exercises() {
+ var in_exercise = false;
+ IPython.notebook.get_cells().forEach(function(cell) {
+ if (in_exercise && cell.metadata.solution2 !== undefined && !cell.metadata.solution2_first) {
+ cell.element.toggleClass('hidden', cell.metadata.solution2 === 'hidden');
+ } else {
+ in_exercise = false;
+ }
+ if (!in_exercise && cell.metadata.solution2 !== undefined) {
+ in_exercise = true;
+ add_element(cell);
+ }
+ });
+ }
+
+ function load_ipython_extension() {
+ // add css
+ $('
')
+ .attr('href', requirejs.toUrl('./main.css'))
+ .appendTo('head');
+
+ // Hide/display existing solutions at startup
+ events.on('notebook_loaded.Notebook', refresh_exercises);
+ if (IPython.notebook._fully_loaded) refresh_exercises();
+
+ var action_name = IPython.keyboard_manager.actions.register({
+ help : 'Exercise2: Create/Remove exercise',
+ help_index: 'ht',
+ icon : 'fa-toggle-on',
+ handler : create_remove_exercise,
+ }, 'create_remove_exercise', 'exercise2');
+
+ return IPython.notebook.config.loaded.then(function() {
+ $.extend(true, cfg, IPython.notebook.config.data.exercise2);
+
+ if (cfg.add_button) {
+ IPython.toolbar.add_buttons_group([action_name]);
+ }
+ if (cfg.use_hotkey && cfg.hotkey) {
+ var cmd_shrts = {};
+ cmd_shrts[cfg.hotkey] = action_name;
+ IPython.keyboard_manager.command_shortcuts.add_shortcuts(cmd_shrts);
+ }
+ }).catch(function(err) {
+ console.warn('[exercise2] error:', err);
+ });
+ }
+
+ return {
+ load_ipython_extension: load_ipython_extension,
+ };
+});
diff --git a/.local/share/jupyter/nbextensions/export_embedded/export_embedded.yaml b/.local/share/jupyter/nbextensions/export_embedded/export_embedded.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d27c1c0d26d01958b581dcb5e863600c072970b4
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/export_embedded/export_embedded.yaml
@@ -0,0 +1,7 @@
+Type: Jupyter Notebook Extension
+Compatibility: 5.x
+Main: main.js
+Name: Export Embedded HTML
+Description: Export to HTML with images embedded
+Icon: icon.png
+Link: readme.md
diff --git a/.local/share/jupyter/nbextensions/export_embedded/main.js b/.local/share/jupyter/nbextensions/export_embedded/main.js
new file mode 100644
index 0000000000000000000000000000000000000000..12211b36f94b1069ae049b11d24a54dad237adf6
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/export_embedded/main.js
@@ -0,0 +1,56 @@
+// toggle display of all code cells' inputs
+
+define([
+ 'jquery',
+ 'base/js/namespace',
+ 'base/js/events'
+], function(
+ $,
+ Jupyter,
+ events
+) {
+ "use strict";
+
+ function initialize () {
+ }
+
+ var load_ipython_extension = function() {
+
+ var v = Jupyter.version.split(".")
+ if(Number(v[0])*10+ Number(v[1]) < 51)
+ {
+ console.log('Notebook version 5.1.0 or higher required for this extension')
+ return
+ }
+
+ /* Add an entry in the download menu */
+ var dwm = $("#download_menu")
+ var downloadEntry = $('
HTML Embedded (.html) ')
+ dwm.append(downloadEntry)
+ downloadEntry.click(function () {
+ Jupyter.menubar._nbconvert('html_embed', true);
+ });
+
+ /* Add also a Button, currently disabled */
+ /*
+ Jupyter.toolbar.add_buttons_group([
+ Jupyter.keyboard_manager.actions.register ({
+ help : 'Embedded HTML Export',
+ icon : 'fa-save',
+ handler: function() {
+ Jupyter.menubar._nbconvert('html_embed', true);
+ }
+ }, 'export-embedded-html', 'export_embedded')
+ ]);
+ */
+ if (Jupyter.notebook !== undefined && Jupyter.notebook._fully_loaded) {
+ // notebook_loaded.Notebook event has already happened
+ initialize();
+ }
+ events.on('notebook_loaded.Notebook', initialize);
+ };
+
+ return {
+ load_ipython_extension : load_ipython_extension
+ };
+});
diff --git a/.local/share/jupyter/nbextensions/export_embedded/readme.md b/.local/share/jupyter/nbextensions/export_embedded/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..3922d9e9beaa8e42b153eb4c8b30f411b8ecb9cc
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/export_embedded/readme.md
@@ -0,0 +1,7 @@
+Export HTML With Embedded Images
+================================
+This extension allows exporting an embedded HTML by an additional download option in File -> Download -> HTML Embedded, (works like: jupyter nbconvert --to html_embed notebook.ipynb)
+
+**Note**: This extension can so far only successfully read relative images paths in the markdown cells (e.g. `![](graphics/pic.png)`) when jupyter is started in the same folder (working directory) where the relative paths can be resolved!
+
+
diff --git a/.local/share/jupyter/nbextensions/freeze/config.yaml b/.local/share/jupyter/nbextensions/freeze/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..035ed83519b4ce112d40b066983116c0e22c995a
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/freeze/config.yaml
@@ -0,0 +1,20 @@
+Type: IPython Notebook Extension
+Name: Freeze
+Description: Freeze cells (forbid editing and executing) or make them read-only
+Link: readme.md
+Icon: icon.png
+Main: main.js
+Compatibility: 4.x, 5.x
+Parameters:
+- name: Freeze.readonly_color
+ description: |
+ Color to use for read-only cell
+ default: '#fffef0'
+ input_type: color
+
+- name: Freeze.frozen_color
+ description: |
+ Color to use for frozen cell
+ default: '#f0feff'
+ input_type: color
+
diff --git a/.local/share/jupyter/nbextensions/freeze/icon.png b/.local/share/jupyter/nbextensions/freeze/icon.png
new file mode 100644
index 0000000000000000000000000000000000000000..d5f88f6a4d8938c72da3ecbded5419b517ad4807
Binary files /dev/null and b/.local/share/jupyter/nbextensions/freeze/icon.png differ
diff --git a/.local/share/jupyter/nbextensions/freeze/main.js b/.local/share/jupyter/nbextensions/freeze/main.js
new file mode 100644
index 0000000000000000000000000000000000000000..ff5361833e9c47cbf771addef2743e39bad7acd3
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/freeze/main.js
@@ -0,0 +1,205 @@
+define([
+ 'base/js/namespace',
+ 'base/js/events',
+ 'notebook/js/codecell',
+ 'notebook/js/textcell',
+ 'jquery'
+], function (
+ Jupyter,
+ events,
+ codecell,
+ textcell,
+ $
+) {
+ 'use strict';
+
+ var CodeCell = codecell.CodeCell;
+ var MarkdownCell = textcell.MarkdownCell;
+
+ var mod_name = 'Freeze';
+ var log_prefix = '[' + mod_name + ']';
+
+ // defaults, overridden by server's config
+ var options = {
+ readonly_color: '#fffef0',
+ frozen_color: '#f0feff'
+ };
+
+ function patch_MarkdownCell_unrender () {
+ console.log('[Freeze] patching MarkdownCell.prototype.unrender');
+ var old_unrender = MarkdownCell.prototype.unrender;
+
+ MarkdownCell.prototype.unrender = function () {
+ // console.log('[Freeze] patched unrender applied');
+ if (this.metadata.run_control === undefined ||
+ !this.metadata.run_control.frozen
+ ) {
+ old_unrender.apply(this, arguments);
+ }
+ };
+ }
+
+ function patch_CodeCell_execute () {
+ console.log('[Freeze] patching CodeCell.prototype.execute');
+ var old_execute = CodeCell.prototype.execute;
+
+ CodeCell.prototype.execute = function () {
+ if (this.metadata.run_control === undefined ||
+ !this.metadata.run_control.frozen
+ ) {
+ old_execute.apply(this, arguments);
+ }
+ };
+ }
+
+ // Migrate old metadata format to new notebook-defined metadata.editable
+ function migrate_state (cell) {
+ if (cell.metadata.run_control !== undefined) {
+ if (cell instanceof CodeCell || cell instanceof MarkdownCell) {
+ if (cell.metadata.run_control.read_only === true) {
+ cell.metadata.editable = false;
+ }
+ }
+ else {
+ // remove metadata irrelevant to non-code/markdown cells
+ delete cell.metadata.run_control.frozen;
+ }
+ // remove old key replaced by metadata.editable
+ delete cell.metadata.run_control.read_only;
+ // remove whole object if it's now empty
+ if (Object.keys(cell.metadata.run_control).length === 0) {
+ delete cell.metadata.run_control;
+ }
+ }
+ }
+
+ function get_state (cell) {
+ if (cell.metadata.editable === false && (cell instanceof CodeCell || cell instanceof MarkdownCell)) {
+ if (cell.metadata.run_control !== undefined && cell.metadata.run_control.frozen) {
+ return 'frozen';
+ }
+ return 'readonly';
+ }
+ return 'normal';
+ }
+
+ function set_state(cell, state) {
+ if (!(cell instanceof CodeCell || cell instanceof MarkdownCell)) {
+ return;
+ }
+
+ state = state || 'normal';
+ var bg;
+ switch (state) {
+ case 'normal':
+ cell.metadata.editable = true;
+ cell.metadata.deletable = true;
+ if (cell.metadata.run_control !== undefined) {
+ delete cell.metadata.run_control.frozen;
+ }
+ bg = "";
+ break;
+ case 'read_only':
+ case 'readonly':
+ cell.metadata.editable = false;
+ cell.metadata.deletable = false;
+ if (cell.metadata.run_control !== undefined) {
+ delete cell.metadata.run_control.frozen;
+ }
+ bg = options.readonly_color;
+ break;
+ case 'frozen':
+ cell.metadata.editable = false;
+ cell.metadata.deletable = false;
+ $.extend(true, cell.metadata, {run_control: {frozen: true}});
+ bg = options.frozen_color;
+ break;
+ }
+ // remove whole object if it's now empty
+ if (cell.metadata.run_control !== undefined && Object.keys(cell.metadata.run_control).length === 0) {
+ delete cell.metadata.run_control;
+ }
+ cell.code_mirror.setOption('readOnly', !cell.metadata.editable);
+ var prompt = cell.element.find('div.input_area');
+ prompt.css("background-color", bg);
+ }
+
+ function set_state_selected (state) {
+ var cells = Jupyter.notebook.get_selected_cells();
+ for (var i = 0; i < cells.length; i++) {
+ set_state(cells[i], state);
+ }
+ }
+
+ function button_callback(state) {
+ set_state_selected(state);
+ var dirty_state = {value: true};
+ events.trigger("set_dirty.Notebook", dirty_state);
+ }
+
+ function make_normal_selected () {
+ button_callback('normal');
+ }
+
+ function make_read_only_selected () {
+ button_callback('read_only');
+ }
+
+ function make_frozen_selected () {
+ button_callback('frozen');
+ }
+
+ function initialize_states () {
+ var cells = Jupyter.notebook.get_cells();
+ for (var i = 0; i < cells.length; i++) {
+ var cell = cells[i];
+ migrate_state(cell);
+ var state = get_state(cell);
+ set_state(cell, state);
+ }
+ }
+
+ function load_extension () {
+ Jupyter.toolbar.add_buttons_group([
+ Jupyter.keyboard_manager.actions.register ({
+ help : 'lift restrictions from selected cells',
+ icon : 'fa-unlock-alt',
+ handler : make_normal_selected
+ }, 'make-cells-normal', mod_name),
+ Jupyter.keyboard_manager.actions.register({
+ help : 'make selected cells read-only',
+ icon: 'fa-lock',
+ handler : make_read_only_selected
+ }, 'make-cells-read-only', mod_name),
+ Jupyter.keyboard_manager.actions.register({
+ help : 'freeze selected cells',
+ icon : 'fa-asterisk',
+ handler : make_frozen_selected
+ }, 'freeze-cells', mod_name)
+ ]);
+
+ patch_CodeCell_execute();
+ patch_MarkdownCell_unrender();
+
+ Jupyter.notebook.config.loaded.then(function on_config_loaded () {
+ $.extend(true, options, Jupyter.notebook.config.data[mod_name]);
+ }, function on_config_load_error (reason) {
+ console.warn(log_prefix, 'Using defaults after error loading config:', reason);
+ }).then(function do_stuff_with_config () {
+ events.on("notebook_loaded.Notebook", initialize_states);
+ if (Jupyter.notebook !== undefined && Jupyter.notebook._fully_loaded) {
+ // notebook already loaded, so we missed the event, so update all
+ initialize_states();
+ }
+ }).catch(function on_error (reason) {
+ console.error(log_prefix, 'Error:', reason);
+ });
+ }
+
+ return {
+ get_state : get_state,
+ set_state : set_state,
+ load_jupyter_extension : load_extension,
+ load_ipython_extension : load_extension
+ };
+});
diff --git a/.local/share/jupyter/nbextensions/freeze/readme.md b/.local/share/jupyter/nbextensions/freeze/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..f7d6bb5335d2e01439959712f58c5f3c95644a03
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/freeze/readme.md
@@ -0,0 +1,24 @@
+# Freeze
+
+This extension allows to make cells read-only or frozen. It provides three buttons:
+* unlock
+* read-only
+* frozen
+
+
+For **code-cells**:
+_read-only_: it can be executed, but its input cannot be changed.
+_frozen_: It cannot be either altered or executed.
+
+For **markdown-cells**:
+_read-only_: It's input can be viewed by double-clicking on it, but cannot be changed.
+_frozen_: Input cannot be viewed by double-clicking.
+
+To change the state of a selected cell, press the corresponding button.
+
+The individual cell's state is stored in its metadata and is applied to the cell if the extension is loaded.
+
+## Internals
+
+The _read-only_ state is stored in the `cell.metadata.editable` attribute. Cells are editable by default.
+The _frozen_ state is stored in the `cell.metadata.run_control.frozen`attribute.
diff --git a/.local/share/jupyter/nbextensions/gist_it/gist_it.yaml b/.local/share/jupyter/nbextensions/gist_it/gist_it.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fdb866a7dedf4822c6231fc5c5183a6799918618
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/gist_it/gist_it.yaml
@@ -0,0 +1,18 @@
+Type: IPython Notebook Extension
+Compatibility: 3.x, 4.x, 5.x
+Name: Gist-it
+Main: main.js
+Description: Adds a button to publish the current notebook as a gist. See the readme for description of the authentication options and relevant parameters.
+Link: readme.md
+Icon: icon.png
+Parameters:
+- name: gist_it_personal_access_token
+ description: Github personal access token. To write gists on a user's behalf, you need a token with the
gist OAuth scope .
+ input_type: text
+- name: gist_it_default_to_public
+ description: Gists default to public. If using a personal access token, gists will default to private. Set this to have them default to being public instead.
+ input_type: checkbox
+- name: github_endpoint
+ description: Github endpoint. Defaults to 'github.com'. Change this to publish to your enterprise github endpoint.
+ input_type: text
+ default: 'github.com'
diff --git a/.local/share/jupyter/nbextensions/gist_it/icon.png b/.local/share/jupyter/nbextensions/gist_it/icon.png
new file mode 100644
index 0000000000000000000000000000000000000000..09e37656024aa727c20465d2a11685a2511ccfd4
Binary files /dev/null and b/.local/share/jupyter/nbextensions/gist_it/icon.png differ
diff --git a/.local/share/jupyter/nbextensions/gist_it/main.js b/.local/share/jupyter/nbextensions/gist_it/main.js
new file mode 100644
index 0000000000000000000000000000000000000000..79c119e024e04de11ec4febcc6e24750eda0cadc
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/gist_it/main.js
@@ -0,0 +1,481 @@
+/**
+ *
+// Avoid server side code :
+// https://github.com/ipython/ipython/issues/2780
+ *
+ * This essentially boils down to the following:
+ * Github authentication requires some server-side code for any 'app' which
+ * wants to authenticate over the Github API.
+ * When registering an app with Github, Github provides the app with what they
+ * call a 'client secret'.
+ * The client secret is then incorporated into the app, and the app sends it to
+ * Github as part of the authentication process, thus proving to Github's
+ * servers that the communicating app was written by someone with appropriate
+ * credentials.
+ *
+ * The issue with writing a single Github app for Gist-ing notebooks, is that
+ * it would need to include such a client secret. Since this would be part of
+ * the extension source code, anyone could use the client secret, potentially
+ * gaining the permissions that any given user has granted to the app.
+ *
+ * As a result, we only support:
+ * - anonymous (non-authenticated) API usage
+ * - client-side authentication using a personal access token
+ * (see https://github.com/settings/tokens)
+ */
+
+define([
+ 'jquery',
+ 'base/js/namespace',
+ 'base/js/dialog'
+], function (
+ $,
+ Jupyter,
+ dialog
+) {
+ "use strict";
+
+ // define default values for config parameters
+ var params = {
+ gist_it_default_to_public: false,
+ gist_it_personal_access_token: '',
+ github_endpoint: 'github.com'
+ };
+
+ var initialize = function () {
+ update_params();
+ Jupyter.toolbar.add_buttons_group([
+ Jupyter.keyboard_manager.actions.register ({
+ help : 'Create/Edit Gist of Notebook',
+ icon : 'fa-github',
+ handler: show_gist_editor_modal
+ }, 'create-gist-from-notebook', 'gist_it')
+ ]);
+ };
+
+ // update params with any specified in the server's config file
+ var update_params = function() {
+ var config = Jupyter.notebook.config;
+ for (var key in params) {
+ if (config.data.hasOwnProperty(key))
+ params[key] = config.data[key];
+ }
+ default_metadata.data.public = Boolean(config.data.gist_it_default_to_public);
+ };
+
+ var default_metadata = {
+ id: '',
+ data: {
+ description: Jupyter.notebook.notebook_path,
+ public: false
+ }
+ };
+
+ function ensure_default_metadata () {
+ Jupyter.notebook.metadata.gist = $.extend(
+ true, // deep-copy
+ default_metadata, //defaults
+ Jupyter.notebook.metadata.gist // overrides
+ );
+ }
+
+ var add_auth_token = function add_auth_token (xhr) {
+ var token = '';
+ if (params.gist_it_personal_access_token !== '') {
+ token = params.gist_it_personal_access_token;
+ }
+ if (token !== '') {
+ xhr.setRequestHeader("Authorization", "token " + token);
+ }
+ };
+
+ function build_alert(alert_class) {
+ return $('
')
+ .addClass('alert alert-dismissable')
+ .addClass(alert_class)
+ .append(
+ $('
')
+ .append($('
').html('×'))
+ );
+ }
+
+ function gist_error (jqXHR, textStatus, errorThrown) {
+ console.log('github ajax error:', jqXHR, textStatus, errorThrown);
+ var alert = build_alert('alert-danger')
+ .hide()
+ .append(
+ $('
').text('The ajax request to Github went wrong:')
+ )
+ .append(
+ $('
').text(jqXHR.responseJSON ? JSON.stringify(jqXHR.responseJSON, null, 2) : errorThrown)
+ );
+ $('#gist_modal').find('.modal-body').append(alert);
+ alert.slideDown('fast');
+ }
+
+ function gist_success (response, textStatus, jqXHR) {
+ // if (Jupyter.notebook.metadata.gist.id === response.id) return;
+
+ Jupyter.notebook.metadata.gist.id = response.id;
+ Jupyter.notebook.metadata._draft = $.extend(
+ true, // deep copy
+ Jupyter.notebook.metadata._draft, // defaults
+ {nbviewer_url: response.html_url} // overrides
+ );
+
+ var d = new Date();
+ var msg_head = d.toLocaleString() + ': Gist ';
+ var msg_tail = response.history.length === 1 ? ' published' : ' updated to revision ' + response.history.length;
+ var alert = build_alert('alert-success')
+ .hide()
+ .append(msg_head)
+ .append(
+ $('
')
+ .attr('href', response.html_url)
+ .attr('target', '_blank')
+ .text(response.id)
+ )
+ .append(msg_tail);
+ $('#gist_modal').find('.modal-body').append(alert);
+ alert.slideDown('fast');
+ }
+
+ function get_github_endpoint() {
+ return params.github_endpoint !== '' ? params.github_endpoint : 'github.com';
+ }
+
+ function get_api_endpoint() {
+ const github_endpoint = get_github_endpoint();
+ if (github_endpoint === 'github.com') {
+ return 'https://api.'+ github_endpoint;
+ } else {
+ // Github Enterprise
+ // https://developer.github.com/enterprise/2.18/v3/enterprise-admin/#endpoint-urls
+ return 'https://' + github_endpoint + '/api/v3'
+ }
+ }
+
+ function gist_id_updated_callback(gist_editor) {
+ if (gist_editor === undefined) gist_editor = $('#gist_editor');
+
+ var id_input = gist_editor.find('#gist_id');
+ var id = id_input.val();
+
+ var help_block = gist_editor.find('#gist_id ~ .help-block');
+ var help_block_base_text = 'Set the gist id to update an existing gist, ' +
+ 'or leave blank to create a new one.';
+
+ var gist_it_button = $('#gist_modal').find('.btn-primary');
+
+ id_input.parent()
+ .removeClass('has-success has-error has-warning')
+ .find('#gist_id ~ .form-control-feedback > i.fa')
+ .removeClass('fa-pencil-square fa-exclamation-circle fa-question-circle');
+
+ if (id === '') {
+ $('#gist_id ~ .form-control-feedback > i.fa')
+ .addClass('fa-plus-circle');
+ help_block.html(
+ '
' + help_block_base_text + '
' +
+ '
a new gist will be created
'
+ );
+ gist_it_button.prop('disabled', false);
+ }
+ else {
+ $('#gist_id ~ .form-control-feedback > i.fa')
+ .addClass('fa-circle-o-notch fa-spin');
+ // List commits as a way of checking whether the gist exists.
+ // Listing commits appears to give the most concise response.
+
+ $.ajax({
+ url: get_api_endpoint() +'/gists/' + id + '/commits',
+ dataType: 'json',
+ beforeSend: add_auth_token,
+ error: function(jqXHR, textStatus, errorThrown) {
+ jqXHR.errorThrown = errorThrown;
+ },
+ complete: function(jqXHR, textStatus) {
+ var success = textStatus === 'success';
+ var error = !success && jqXHR.status === 404 && jqXHR.responseJSON !== undefined;
+ var warning = !success && !error;
+
+ var help_block_html = '
' + help_block_base_text + '
';
+
+ gist_it_button.prop('disabled', error);
+ if (success) {
+ var single = (jqXHR.responseJSON.length === 1);
+ help_block_html += '
' +
+ ' ' +
+ ' gist ' +
+ '' + id + ' will be updated' +
+ ' (' + jqXHR.responseJSON.length +
+ ' revision' + (single ? '' : 's') +
+ ' exist' + (single ? 's' : '') + ' so far)' +
+ '
';
+ }
+ else if (error) {
+ help_block_html += '
' +
+ ' ' +
+ ' no gist exists with the specified id (given current access token)'+
+ '
';
+ }
+ else {
+ help_block_html += '
' +
+ ' ' +
+ ' can\'t list commits for the specified gist id - you may have problems updating it!' +
+ '
';
+ help_block_html += '
The ajax request to Github went wrong:
' +
+ '
';
+ if (jqXHR.responseJSON) {
+ help_block_html += JSON.stringify(jqXHR.responseJSON, null, 2);
+ }
+ else {
+ help_block_html += jqXHR.errorThrown || textStatus;
+ }
+ help_block_html += ' ';
+ console.log('non-404 github ajax error:', jqXHR, textStatus);
+ }
+ help_block.html(help_block_html);
+
+ id_input.parent()
+ .toggleClass('has-success', success)
+ .toggleClass('has-error', error)
+ .toggleClass('has-warning', warning)
+ .find('#gist_id ~ .form-control-feedback > i.fa')
+ .removeClass('fa-circle-o-notch fa-spin')
+ .toggleClass('fa-pencil-square', success)
+ .toggleClass('fa-exclamation-circle', error)
+ .toggleClass('fa-question-circle', warning);
+ }
+ });
+ }
+ }
+
+ function update_gist_editor (gist_editor) {
+ if (gist_editor === undefined) gist_editor = $('#gist_editor');
+
+ var id_input = gist_editor.find('#gist_id');
+
+ var have_auth = params.gist_it_personal_access_token !== '';
+ var id = '';
+ var is_public = true;
+ if (have_auth) {
+ id = Jupyter.notebook.metadata.gist.id;
+ is_public = Jupyter.notebook.metadata.gist.data.public;
+ id_input.val(id);
+ }
+ id_input.closest('.form-group').toggle(have_auth);
+
+ gist_editor.find('#gist_public')
+ .prop('checked', is_public)
+ .prop('readonly', !have_auth);
+
+ gist_editor.find('#gist_description')
+ .val(Jupyter.notebook.metadata.gist.data.description);
+
+ if (have_auth) {
+ gist_id_updated_callback(gist_editor);
+ }
+ }
+
+ function build_gist_editor () {
+ ensure_default_metadata();
+
+ var gist_editor = $('#gist_editor');
+
+ if (gist_editor.length > 0) return gist_editor;
+
+ gist_editor = $('
').attr('id', 'gist_editor').append(controls);
+
+ var id = params.gist_it_personal_access_token !== '' ? Jupyter.notebook.metadata.gist.id : '';
+ var controls = $('
')
+ .appendTo(gist_editor)
+ .addClass('form-horizontal');
+
+ $('
')
+ .addClass('has-feedback')
+ .hide()
+ .appendTo(controls)
+ .append(
+ $('
')
+ .attr('for', 'gist_id')
+ .text('Gist id')
+ )
+ .append(
+ $('
')
+ .addClass('form-control')
+ .attr('id', 'gist_id')
+ .val(Jupyter.notebook.metadata.gist.id)
+ )
+ .append(
+ $('
')
+ .addClass('form-control-feedback')
+ .append(
+ $('
')
+ .addClass('fa fa-lg')
+ )
+ )
+ .append(
+ $('
')
+ .addClass('help-block')
+ );
+ $('
')
+ .appendTo(controls)
+ .append(
+ $('
')
+ .addClass('checkbox')
+ .append(
+ $('
')
+ .text('Make the gist public')
+ .prepend(
+ $(' ')
+ .attr('type', 'checkbox')
+ .attr('id', 'gist_public')
+ .prop('checked', Jupyter.notebook.metadata.gist.data.public)
+ .prop('readonly', id === '')
+ )
+ )
+ )
+ .append(
+ $(' ')
+ .attr('for', 'gist_public')
+ .text('public')
+ );
+ $('
')
+ .appendTo(controls)
+ .append(
+ $(' ')
+ .attr('for', 'gist_description')
+ .text('description')
+ )
+ .append(
+ $(' ')
+ .addClass('form-control')
+ .attr('id', 'gist_description')
+ .attr('type', 'textarea')
+ .val(Jupyter.notebook.metadata.gist.data.description)
+ );
+
+ var form_groups = controls.children('div').addClass('form-group');
+ form_groups
+ .children('label')
+ .addClass('col-sm-2 control-label')
+ .css('padding-right', '1em');
+ form_groups
+ .each(function (index, elem) {
+ $('
')
+ .appendTo(elem)
+ .addClass('col-sm-10')
+ .append($(elem).children(':not(label)'));
+ });
+
+ update_gist_editor(gist_editor);
+
+ // bind events for id changing
+ var id_input = gist_editor.find('#gist_id');
+ // Save current value of element
+ id_input.data('oldVal', id_input.val());
+ // Look for changes in the value
+ id_input.bind("change click keyup input paste", function(event) {
+ // If value has changed...
+ if (id_input.data('oldVal') !== id_input.val()) {
+ // Updated stored value
+ id_input.data('oldVal', id_input.val());
+ // Do action
+ gist_id_updated_callback(gist_editor);
+ }
+ });
+
+ return gist_editor;
+ }
+
+ function show_gist_editor_modal () {
+ var modal;
+ modal = dialog.modal({
+ show: false,
+ title: 'Share on Github',
+ notebook: Jupyter.notebook,
+ keyboard_manager: Jupyter.notebook.keyboard_manager,
+ body: build_gist_editor(),
+ buttons: {
+ ' Gist it!': {
+ class : 'btn-primary',
+ click: function() {
+ modal.find('.btn').prop('disabled', true);
+ var new_data = {
+ public: $('#gist_public').prop('checked'),
+ description: $('#gist_description').val()
+ };
+ $.extend(
+ true,
+ Jupyter.notebook.metadata.gist.data,
+ new_data
+ );
+ // prevent the modal from closing. See github.com/twbs/bootstrap/issues/1202
+ modal.data('bs.modal').isShown = false;
+ var spinner = modal.find('.btn-primary .fa-github').addClass('fa-spin');
+ make_gist(function (jqXHR, textStatus) {
+ modal.find('.btn').prop('disabled', false);
+ // allow the modal to close again. See github.com/twbs/bootstrap/issues/1202
+ modal.data('bs.modal').isShown = true;
+ spinner.removeClass('fa-spin');
+ });
+ }
+ },
+ done: {}
+ }
+ })
+ .attr('id', 'gist_modal')
+ .on('shown.bs.modal', function (evt) {
+ var err = modal.find('#gist_id').parent().hasClass('has-error');
+ modal.find('.btn-primary').prop('disabled', err);
+ });
+
+ modal.find('.btn-primary').prepend(
+ $(' ')
+ .addClass('fa fa-lg fa-github')
+ );
+
+ modal.modal('show');
+ }
+
+ var make_gist = function make_gist (complete_callback) {
+ ensure_default_metadata();
+
+ var data = $.extend(
+ true, // deep-copy
+ { files: {} }, // defaults
+ Jupyter.notebook.metadata.gist.data // overrides
+ );
+ var filename = Jupyter.notebook.notebook_name;
+ data.files[filename] = {
+ content: JSON.stringify(Jupyter.notebook.toJSON(), null, 2)
+ };
+
+ var id_input = $('#gist_id');
+ var id = params.gist_it_personal_access_token !== '' ? id_input.val() : '';
+ var method = id ? 'PATCH' : 'POST';
+
+ // Create/edit the Gist
+ $.ajax({
+ url: get_api_endpoint() +'/gists' + (id ? '/' + id : ''),
+ type: method,
+ dataType: 'json',
+ data: JSON.stringify(data),
+ beforeSend: add_auth_token,
+ success: gist_success,
+ error: gist_error,
+ complete: complete_callback
+ });
+ };
+
+ function load_jupyter_extension () {
+ return Jupyter.notebook.config.loaded.then(initialize);
+ }
+
+ return {
+ load_jupyter_extension: load_jupyter_extension,
+ load_ipython_extension: load_jupyter_extension
+ };
+});
diff --git a/.local/share/jupyter/nbextensions/go_to_current_running_cell/README.md b/.local/share/jupyter/nbextensions/go_to_current_running_cell/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..067691526a2821ad8b0ab8e2f44b11c2ef30e26b
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/go_to_current_running_cell/README.md
@@ -0,0 +1,22 @@
+Go to Running Cell
+==================
+
+This is an extension allows you to jump to the current running cell. You can also activate this functionality automatically, i.e., your view is always scolling to the current cell.
+
+Button: A button with eye icon that you can go to the first running cell.
+![button](anchor.png)
+
+Keyboard shortcuts:
+-------------------
+__*Alt-I*__ (Jump to first running cell)
+__*Meta-[*__ (Follow executing cell On)
+__*Meta-]*__(Follow executing cell Off)
+
+Demo
+----
+### Jump to first running cell
+![button](jump_to_cell.gif)
+
+### Follow executing cell
+
+![button](auto_focus.gif)
\ No newline at end of file
diff --git a/.local/share/jupyter/nbextensions/go_to_current_running_cell/eye.png b/.local/share/jupyter/nbextensions/go_to_current_running_cell/eye.png
new file mode 100644
index 0000000000000000000000000000000000000000..2624611c8d9b99a8a0e7fe3d6b3717ae5f957d6a
Binary files /dev/null and b/.local/share/jupyter/nbextensions/go_to_current_running_cell/eye.png differ
diff --git a/.local/share/jupyter/nbextensions/go_to_current_running_cell/go_to_current_running_cell.yaml b/.local/share/jupyter/nbextensions/go_to_current_running_cell/go_to_current_running_cell.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4bfcb6ebe59be3a5ee894f846bb327dc5855fdb2
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/go_to_current_running_cell/go_to_current_running_cell.yaml
@@ -0,0 +1,26 @@
+Type: Jupyter Notebook Extension
+Name: Go to Current Running Cells
+Description: Go to Running cell and always scroll into current running cell view
+Link: README.md
+Main: main.js
+Compatibility: 4.x, 5.x
+
+Parameters:
+- name: is_follow_cell
+ description: Activate follow executing cells, default behavior is false.
+ input_type: checkbox
+- name: go_to_running_cell_shortcut
+ description: Go to first running cell
+ input_type: input
+ default: Alt-I
+- name: follow_cell_on_shortcut
+ description: Enable following running cell
+ input_type: input
+ default: Alt-;
+- name: follow_cell_off_shortcut
+ description: Disable following running cell
+ input_type: input
+ default: Alt-'
+- name: button_icon
+ description: Button for go to first running cell
+ default: fa-anchor
\ No newline at end of file
diff --git a/.local/share/jupyter/nbextensions/help_panel/help_panel.css b/.local/share/jupyter/nbextensions/help_panel/help_panel.css
new file mode 100644
index 0000000000000000000000000000000000000000..dee834215fe68fb66b5b103f389e4a540737f1fe
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/help_panel/help_panel.css
@@ -0,0 +1,56 @@
+#side_panel {
+ position: relative;
+ top: 0;
+ right: 0;
+ height: 100%;
+ font-size: 0.9em;
+ overflow: auto;
+ background-color: #FFFFFF;
+}
+
+@media print {
+ /* print-previews can't handle 100%-height element as main part of page */
+ #side_panel {
+ height: auto;
+ }
+
+ /*
+ see
+ stackoverflow.com/questions/19948474
+ stackoverflow.com/questions/20243767
+ */
+ .col-md-6 {
+ float: left;
+ width: 50%;
+ }
+
+ .quickhelp {
+ page-break-inside: avoid;
+ }
+}
+
+.side_panel_splitbar {
+ position: absolute;
+ left: 0;
+ top: 0;
+ cursor: col-resize;
+ height: 100%;
+ width: 8px;
+ background: url(./img/handle-v.png) 2px 50% no-repeat;
+ background-color: #F6F6F6;
+}
+
+.side_panel_inner {
+ overflow: auto;
+ height: inherit;
+ margin-left: 8px;
+}
+
+.side_panel_inner > div:not(.alert) {
+ padding: 0.5em;
+}
+
+.help_panel_hide .modal-backdrop,
+.help_panel_hide .modal {
+ display: none !important;
+}
diff --git a/.local/share/jupyter/nbextensions/help_panel/help_panel.js b/.local/share/jupyter/nbextensions/help_panel/help_panel.js
new file mode 100644
index 0000000000000000000000000000000000000000..13e7a326a7280b97a75c6d6b0982748eb5a5a6fa
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/help_panel/help_panel.js
@@ -0,0 +1,248 @@
+// Add help panel at right side of notebook window
+
+define([
+ 'require',
+ 'jqueryui',
+ 'base/js/namespace',
+ 'base/js/events',
+], function (
+ requirejs,
+ $,
+ IPython,
+ events
+) {
+ 'use strict';
+
+ /**
+ * try to get bootstrap tooltip plugin.
+ * The require call may fail, since the plugin doesn't seem to be included
+ * in all Jupyter versions. In this case, we fallback to using jqueryui tooltips.
+ */
+ var have_bs_tooltips = false;
+ requirejs(
+ ['components/bootstrap/js/tooltip'],
+ // we don't actually need to do anything with the return
+ // just ensure that the plugin gets loaded.
+ function () { have_bs_tooltips = true; },
+ // The errback, error callback
+ // The error has a list of modules that failed
+ function (err) {
+ var failedId = err.requireModules && err.requireModules[0];
+ if (failedId === 'components/bootstrap/js/tooltip') {
+ // could do something here, like load a cdn version.
+ // For now, just ignore it.
+ have_bs_tooltips = false;
+ }
+ }
+ );
+
+ // define default values for config parameters
+ var params = {
+ help_panel_add_toolbar_button: false
+ };
+
+ // update params with any specified in the server's config file
+ function update_params () {
+ var config = IPython.notebook.config;
+ for (var key in params) {
+ if (config.data.hasOwnProperty(key))
+ params[key] = config.data[key];
+ }
+ }
+
+ var initialize = function () {
+ update_params();
+ if (params.help_panel_add_toolbar_button) {
+ $(IPython.toolbar.add_buttons_group([
+ IPython.keyboard_manager.actions.register({
+ help : 'Show help panel',
+ icon : 'fa-book',
+ handler: function() {
+ var visible = toggleHelpPanel();
+ var btn = $(this);
+ setTimeout(function() { btn.blur(); }, 500);
+ }
+ }, 'show-help-panel', 'help_panel'),
+ ])).find('.btn').attr({
+ id: 'btn_help_panel',
+ 'data-toggle': 'button',
+ 'aria-pressed': 'false'
+ });
+ }
+ };
+
+ var side_panel_min_rel_width = 10;
+ var side_panel_max_rel_width = 90;
+ var side_panel_start_width = 45;
+
+ var build_side_panel = function (main_panel, side_panel, min_rel_width, max_rel_width) {
+ if (min_rel_width === undefined) min_rel_width = 0;
+ if (max_rel_width === undefined) max_rel_width = 100;
+
+ side_panel.css('display','none');
+ side_panel.insertAfter(main_panel);
+
+ var side_panel_splitbar = $('
');
+ var side_panel_inner = $('
');
+ var side_panel_expand_contract = $('');
+ side_panel.append(side_panel_splitbar);
+ side_panel.append(side_panel_inner);
+ side_panel_inner.append(side_panel_expand_contract);
+
+ side_panel_expand_contract.attr({
+ title: 'expand/contract panel',
+ 'data-toggle': 'tooltip'
+ }).tooltip({
+ placement: 'right'
+ }).click(function () {
+ var open = $(this).hasClass('fa-expand');
+ var site = $('#site');
+ slide_side_panel(main_panel, side_panel,
+ open ? 100 : side_panel.data('last_width') || side_panel_start_width);
+ $(this).toggleClass('fa-expand', !open).toggleClass('fa-compress', open);
+
+ var tooltip_text = (open ? 'shrink to not' : 'expand to') + ' fill the window';
+ if (open) {
+ side_panel.insertAfter(site);
+ site.slideUp();
+ $('#header').slideUp();
+ side_panel_inner.css({'margin-left': 0});
+ side_panel_splitbar.hide();
+ }
+ else {
+ side_panel.insertAfter(main_panel);
+ $('#header').slideDown();
+ site.slideDown({
+ complete: function() { events.trigger('resize-header.Page'); }
+ });
+ side_panel_inner.css({'margin-left': ''});
+ side_panel_splitbar.show();
+ }
+
+ if (have_bs_tooltips) {
+ side_panel_expand_contract.attr('title', tooltip_text);
+ side_panel_expand_contract.tooltip('hide').tooltip('fixTitle');
+ }
+ else {
+ side_panel_expand_contract.tooltip('option', 'content', tooltip_text);
+ }
+ });
+
+ // bind events for resizing side panel
+ side_panel_splitbar.mousedown(function (md_evt) {
+ md_evt.preventDefault();
+ $(document).mousemove(function (mm_evt) {
+ mm_evt.preventDefault();
+ var pix_w = side_panel.offset().left + side_panel.outerWidth() - mm_evt.pageX;
+ var rel_w = 100 * (pix_w) / side_panel.parent().width();
+ rel_w = rel_w > min_rel_width ? rel_w : min_rel_width;
+ rel_w = rel_w < max_rel_width ? rel_w : max_rel_width;
+ main_panel.css('width', (100 - rel_w) + '%');
+ side_panel.css('width', rel_w + '%').data('last_width', rel_w);
+ });
+ return false;
+ });
+ $(document).mouseup(function (mu_evt) {
+ $(document).unbind('mousemove');
+ });
+
+ return side_panel;
+ };
+
+ var slide_side_panel = function (main_panel, side_panel, desired_width) {
+
+ var anim_opts = {
+ step : function (now, tween) {
+ main_panel.css('width', 100 - now + '%');
+ }
+ };
+
+ if (desired_width === undefined) {
+ if (side_panel.is(':hidden')) {
+ desired_width = (side_panel.data('last_width') || side_panel_start_width);
+ }
+ else {
+ desired_width = 0;
+ }
+ }
+
+ var visible = desired_width > 0;
+ if (visible) {
+ main_panel.css({float: 'left', 'overflow-x': 'auto'});
+ side_panel.show();
+ }
+ else {
+ anim_opts['complete'] = function () {
+ side_panel.hide();
+ main_panel.css({float : '', 'overflow-x': '', width: ''});
+ };
+ }
+
+ side_panel.animate({width: desired_width + '%'}, anim_opts);
+ return visible;
+ };
+
+ var populate_side_panel = function(side_panel) {
+ var side_panel_inner = side_panel.find('.side_panel_inner');
+ var qh = IPython.quick_help;
+ var strip_modal = function(into) {
+ // strip qh modal, insert content into element 'into'
+ $('.quickhelp').closest('.modal-body').children().children().appendTo(into);
+ };
+
+ if ($('.quickhelp').length > 0) {
+ strip_modal(side_panel_inner);
+ }
+ else {
+ // ensure quickhelp shortcuts modal won't show
+ $('body').addClass('help_panel_hide');
+ // get quickhelp to show shortcuts
+ qh.show_keyboard_shortcuts();
+ // attach handler for qh showing shortcuts
+ var qh_dia = $(qh.shortcut_dialog);
+ qh_dia.on('shown.bs.modal', function(evt) {
+ strip_modal(side_panel_inner);
+ // delicately pretend that it was never shown, unbind handlers
+ qh_dia.on('hidden.bs.modal', function () {
+ $('body').removeClass('help_panel_hide');
+ qh_dia.off('hidden.bs.modal');
+ }).off('shown.bs.modal').modal("hide");
+ });
+ }
+ // make sure content we stripped will be rebuilt
+ qh.force_rebuild = true;
+ };
+
+ var toggleHelpPanel = function () {
+ var main_panel = $('#notebook_panel');
+ var side_panel = $('#side_panel');
+
+ if (side_panel.length < 1) {
+ side_panel = $('
');
+ build_side_panel(main_panel, side_panel,
+ side_panel_min_rel_width, side_panel_max_rel_width);
+ populate_side_panel(side_panel);
+ }
+
+ var visible = slide_side_panel(main_panel, side_panel);
+ if (params.help_panel_add_toolbar_button) {
+ $('#btn_help_panel').toggleClass('active', visible);
+ }
+ return visible;
+ };
+
+ var load_ipython_extension = function () {
+ $('head').append(
+ $(' ', {
+ rel: 'stylesheet',
+ type:'text/css',
+ href: requirejs.toUrl('./help_panel.css')
+ })
+ );
+ return IPython.notebook.config.loaded.then(initialize);
+ };
+
+ return {
+ load_ipython_extension : load_ipython_extension
+ };
+});
diff --git a/.local/share/jupyter/nbextensions/help_panel/help_panel_ext.png b/.local/share/jupyter/nbextensions/help_panel/help_panel_ext.png
new file mode 100644
index 0000000000000000000000000000000000000000..3069ea03f8b8318738bbfe79f8c3dc6aaeca2f66
Binary files /dev/null and b/.local/share/jupyter/nbextensions/help_panel/help_panel_ext.png differ
diff --git a/.local/share/jupyter/nbextensions/help_panel/img/handle-v.png b/.local/share/jupyter/nbextensions/help_panel/img/handle-v.png
new file mode 100644
index 0000000000000000000000000000000000000000..0e1a9598eee2f1c793ce7d78f69b4532e74cbdc9
Binary files /dev/null and b/.local/share/jupyter/nbextensions/help_panel/img/handle-v.png differ
diff --git a/.local/share/jupyter/nbextensions/hide_header/README.md b/.local/share/jupyter/nbextensions/hide_header/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cf533098850eeca3382b29059ebd88e723c5a936
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/hide_header/README.md
@@ -0,0 +1,4 @@
+Hide Header
+===========
+
+Add keyboard shortcut to toggle the whole header, menubar and toolbar visibility.
diff --git a/.local/share/jupyter/nbextensions/hide_header/hide_header.yaml b/.local/share/jupyter/nbextensions/hide_header/hide_header.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b486a2a9d2c4f87abc681dcccfc82ac01c85863c
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/hide_header/hide_header.yaml
@@ -0,0 +1,12 @@
+Type: IPython Notebook Extension
+Name: Hide Header
+Link: README.md
+Description: Toggle visibility of all of header, menubar, toolbar using a hotkey
+Main: main.js
+Compatibility: 4.x, 5.x
+Parameters:
+- name: header_toggle
+ description: keybinding for toggling header visibility
+ input_type: hotkey
+ default: ctrl-H
+
diff --git a/.local/share/jupyter/nbextensions/hide_input/icon.png b/.local/share/jupyter/nbextensions/hide_input/icon.png
new file mode 100644
index 0000000000000000000000000000000000000000..a9bcbeacd4686b57ae2819ee77a15660173cb6ce
Binary files /dev/null and b/.local/share/jupyter/nbextensions/hide_input/icon.png differ
diff --git a/.local/share/jupyter/nbextensions/hide_input/readme.md b/.local/share/jupyter/nbextensions/hide_input/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..83a224fd4299750ff70343512ff5992e7f0fba6e
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/hide_input/readme.md
@@ -0,0 +1,51 @@
+Hide Input
+==========
+
+This extension allows hiding of an individual codecell in a notebook. This can
+be achieved by clicking on the toolbar button:
+
+![](icon.png)
+
+
+Internals
+---------
+
+The codecell hiding state is stored in the metadata `cell.metadata.hide_input`.
+If it is set to `true`, the codecell will be hidden on reload.
+
+
+Exporting with nbconvert
+------------------------
+
+See also the general docs for exporting using nbconvert at
+[jupyter-contrib-nbextensions.readthedocs.io](https://jupyter-contrib-nbextensions.readthedocs.io/en/latest/).
+
+To export a notebook with hidden cell inputs using nbconvert, you need to use a
+custom template.
+The required template is supplied as part of
+`jupyter_contrib_nbextensions.nbconvert_support`, or you can roll your own
+using the provided ones as examples. Again, see the docs linked above for more
+information.
+
+The `nbextensions.tpl` template is provided in the
+`jupyter_contrib_nbextensions.nbconvert_support` templates directory (see the
+docs mentioned above for how to find it)
+
+To use, add the template to your `nbconvert` call:
+
+ jupyter nbconvert --template=nbextensions --to=html my_notebook.ipynb
+
+The nbextensions template will respect the `cell.metadata.hide_input` flag, and
+filter the cell's output prompt (the bit that looks like `Out[27]:`).
+The filter is only used for html output, not for PDF or LaTeX output.
+
+If you want to _keep_ the cell output prompt, you will have to remove the lines
+
+ {% block output_group -%}
+ {%- if cell.metadata.hide_output or nb.metadata.hide_input -%}
+ {%- else -%}
+ {{ super() }}
+ {%- endif -%}
+ {% endblock output_group %}
+
+in the `nbextensions.tpl` file.
diff --git a/.local/share/jupyter/nbextensions/hide_input_all/hide_input_all.yaml b/.local/share/jupyter/nbextensions/hide_input_all/hide_input_all.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec413205b9b61aeb2f7a86c53e87a2dbf114133f
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/hide_input_all/hide_input_all.yaml
@@ -0,0 +1,7 @@
+Type: IPython Notebook Extension
+Compatibility: 3.x 4.x 5.x
+Main: main.js
+Name: Hide input all
+Description: "toggle display of all code cells' inputs"
+Icon: icon.png
+Link: readme.md
diff --git a/.local/share/jupyter/nbextensions/hide_input_all/hide_input_all_show.png b/.local/share/jupyter/nbextensions/hide_input_all/hide_input_all_show.png
new file mode 100644
index 0000000000000000000000000000000000000000..8d7e03091db8cf96293b123c49955fd5bcb91e35
Binary files /dev/null and b/.local/share/jupyter/nbextensions/hide_input_all/hide_input_all_show.png differ
diff --git a/.local/share/jupyter/nbextensions/hide_input_all/readme.md b/.local/share/jupyter/nbextensions/hide_input_all/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..bd1704b934188e0e95d1af7b91ef28ece32945d6
--- /dev/null
+++ b/.local/share/jupyter/nbextensions/hide_input_all/readme.md
@@ -0,0 +1,44 @@
+Hide all Input
+==============
+This extension allows hiding all codecells of a notebook. This can be achieved by clicking on the button toolbar:
+
+![](icon.png)
+
+Typically, all codecells are shown with their corresponding output:
+
+![](hide_input_all_show.png)
+
+Clicking on the "Toggle codecell display" toolbar button hides all codecells:
+
+![](hide_input_all_hide.png)
+
+
+Internals
+---------
+
+The codecell hiding state is stored in the metadata `IPython.notebook.metadata.hide_input`.
+If it is set to `true`, all codecells will be hidden on reload.
+
+The `nbextensions.tpl` template is provided in the
+`jupyter_contrib_nbextensions.nbconvert_support` templates directory (see the
+docs mentioned above for how to find it)
+
+To use, add the template to your `nbconvert` call:
+
+ jupyter nbconvert --template=nbextensions --to=html my_notebook.ipynb
+
+The nbextensions template will respect the `nb.metadata.hide_input` flag, and
+filter the cell's output prompt (the bit that looks like `Out[27]:`).
+The filter is only used for html output, not for PDF or LaTeX output.
+
+If you want to _keep_ the cell output prompt, you will have to remove the lines
+
+ {% block output_group -%}
+ {%- if cell.metadata.hide_output or nb.metadata.hide_input -%}
+ {%- else -%}
+ {{ super() }}
+ {%- endif -%}
+ {% endblock output_group %}
+
+in the `nbextensions.tpl` file.
+
\ No newline at end of file
diff --git a/.netrc b/.netrc
new file mode 100644
index 0000000000000000000000000000000000000000..fe861140efc2508d2b9a8931e7299c81bbdb72ac
--- /dev/null
+++ b/.netrc
@@ -0,0 +1,3 @@
+machine api.wandb.ai
+ login user
+ password 6ecd16bdbbf69126cf2fc67463add9b850b266be
diff --git a/.profile b/.profile
new file mode 100644
index 0000000000000000000000000000000000000000..c4c7402daf7ae66c19afdd13030901209f3d77fa
--- /dev/null
+++ b/.profile
@@ -0,0 +1,9 @@
+# ~/.profile: executed by Bourne-compatible login shells.
+
+if [ "$BASH" ]; then
+ if [ -f ~/.bashrc ]; then
+ . ~/.bashrc
+ fi
+fi
+
+mesg n 2> /dev/null || true
diff --git a/.ssh/authorized_keys b/.ssh/authorized_keys
new file mode 100644
index 0000000000000000000000000000000000000000..2c62162761b0e7a99253cb69d264f9529fab4808
--- /dev/null
+++ b/.ssh/authorized_keys
@@ -0,0 +1 @@
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDoIyvGR41mgVaXe6jD5Nse22nu8s5lp2/+BQjs1yh31Weu2FLduzVKqGcB17yzYVqOYTaDwQLY0PlHzfx8DFP9GJHhIYa9XRqCQrB8GdyL24c6PsM7quQI2+uG5261kZFadfUz0W9tXCabbeEFmqnymUpnyyeklK5+mYO+mSoctmYNsKUFPdfxW6P4/H8BrxHHj53hAQbi1Lcz9+mrsEaAWdjnVVEjW+qH+kOGzzv4v1VA780cB1dS2UjeDgH5JRb3yFS4Y1MKdk2WVzjjvD92LplJu2E1/GFk3PHicV8+wwlB5QBVCRKvAm85QPutFTpurj/cTou2ISfNeLREiUwF6gfWjg7iTcuR3gWR4EEocS2YmJ+b07MPMvizI/ISb0gp3YwoDbTkIbnuXsdzxznzb49SMTr3Jg91mL4JUEpAOZjMaN/90Ik5/OBtYrhYxUIj9pR3RXzXJE9c9TEbkjMM3e/rrXn1yA7srzAGeJp/vKM+8i0pGo+Nf0gMD8zA82/ndlCRPS0MG9S2IeiW/EulE0VsIs5xvsaYiOSICauGiLECCHIZXDQcAH7wTuJFmJeqxGzCAopSy4/E1BLMi+D3/+AZLx8WGqF5nXT6BggRdhhzwLO5LNrwC+GUe8+2CZrKrO15j550cHa+ULiwqCf1w7/kl/kK0W5u5noFBxP9HQ== prasadchandalada@gmail..com
\ No newline at end of file
diff --git a/.ssh/known_hosts b/.ssh/known_hosts
new file mode 100644
index 0000000000000000000000000000000000000000..b92939762b3b9ecd6767d95a68d11d4eb45d4d45
--- /dev/null
+++ b/.ssh/known_hosts
@@ -0,0 +1 @@
+|1|bt6nSRocaO9qtCHNahkq62VxEz0=|tK/HjVuKdmSO2NOe+4TtaTv0rVo= ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEupViOcbwUgb1IIbIupx/7i7iCnQpo+L2txA5fn/Vxh
diff --git a/.vast_api_key b/.vast_api_key
new file mode 100644
index 0000000000000000000000000000000000000000..e7511c27c79d4f04d542e6e79f97c1b7f170412d
--- /dev/null
+++ b/.vast_api_key
@@ -0,0 +1 @@
+1efc8d05bb9bbe3541b3e9fdd4650c0c906e9e7ee2f4196f040981de3c073140
diff --git a/.vast_containerlabel b/.vast_containerlabel
new file mode 100644
index 0000000000000000000000000000000000000000..37e0fbfda84ba161230d2ae4d493c189bef2aefe
--- /dev/null
+++ b/.vast_containerlabel
@@ -0,0 +1 @@
+C.12733928
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/configurator.py b/configurator.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1b9a4999e1815e202c0ae3c2f284f70cbe6eb3a
--- /dev/null
+++ b/configurator.py
@@ -0,0 +1,47 @@
+"""
+Poor Man's Configurator. Probably a terrible idea. Example usage:
+$ python train.py config/override_file.py --batch_size=32
+this will first run config/override_file.py, then override batch_size to 32
+
+The code in this file will be run as follows from e.g. train.py:
+>>> exec(open('configurator.py').read())
+
+So it's not a Python module, it's just shuttling this code away from train.py
+The code in this script then overrides the globals()
+
+I know people are not going to love this, I just really dislike configuration
+complexity and having to prepend config. to every single variable. If someone
+comes up with a better simple Python solution I am all ears.
+"""
+
+import sys
+from ast import literal_eval
+
+for arg in sys.argv[1:]:
+ if '=' not in arg:
+ # assume it's the name of a config file
+ assert not arg.startswith('--')
+ config_file = arg
+ print(f"Overriding config with {config_file}:")
+ with open(config_file) as f:
+ print(f.read())
+ exec(open(config_file).read())
+ else:
+ # assume it's a --key=value argument
+ assert arg.startswith('--')
+ key, val = arg.split('=')
+ key = key[2:]
+ if key in globals():
+ try:
+ # attempt to eval it it (e.g. if bool, number, or etc)
+ attempt = literal_eval(val)
+ except (SyntaxError, ValueError):
+ # if that goes wrong, just use the string
+ attempt = val
+ # ensure the types match ok
+ assert type(attempt) == type(globals()[key])
+ # cross fingers
+ print(f"Overriding: {key} = {attempt}")
+ globals()[key] = attempt
+ else:
+ raise ValueError(f"Unknown config key: {key}")
\ No newline at end of file
diff --git a/hasbooted b/hasbooted
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/model.py b/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..1380c4f4cca1a18ca21d9d12509cfed04c63cc4f
--- /dev/null
+++ b/model.py
@@ -0,0 +1,390 @@
+# gpt2-model-positional-encodings.py
+
+import math
+import inspect
+from dataclasses import dataclass
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import necessary modules for different positional encodings
+import numpy as np
+import scipy.special
+import scipy.signal
+
+from packaging import version
+
+# Check if scaled_dot_product_attention is available and supports flash attention
+use_flash_attn = 'scaled_dot_product_attention' in dir(F) and version.parse(torch.__version__) >= version.parse('2.0.0')
+if use_flash_attn:
+ print("Flash Attention v2 is available and will be used where possible.")
+else:
+ print("Flash Attention v2 is not available. Using standard attention.")
+
+class LayerNorm(nn.Module):
+ """LayerNorm with optional bias."""
+ def __init__(self, ndim, bias):
+ super().__init__()
+ self.weight = nn.Parameter(torch.ones(ndim))
+ self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+ def forward(self, input):
+ return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+def get_positional_encoding(position, d_model, method, max_len=5000):
+ """
+ Generate positional encodings based on the specified method.
+ """
+ if method == 'default':
+ return None # Handled by nn.Embedding in the model
+ elif method == 'learned':
+ return None # Handled by nn.Embedding in the model
+ elif method == 'sinusoidal':
+ pe = torch.zeros(max_len, d_model)
+ position_enc = position.unsqueeze(1)
+ div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
+ pe[:, 0::2] = torch.sin(position_enc * div_term)
+ pe[:, 1::2] = torch.cos(position_enc * div_term)
+ return pe
+ elif method == 'exponential':
+ pe = torch.exp(-position.float() / max_len).unsqueeze(1).repeat(1, d_model)
+ return pe
+ elif method == 'polynomial_legendre':
+ pe = torch.zeros(max_len, d_model)
+ x = (position / max_len * 2) - 1 # Scale positions to [-1,1]
+ for i in range(d_model):
+ pe[:, i] = scipy.special.eval_legendre(i, x)
+ return pe
+ elif method == 'polynomial_chebyshev':
+ pe = torch.zeros(max_len, d_model)
+ x = (position / max_len * 2) - 1 # Scale positions to [-1,1]
+ for i in range(d_model):
+ pe[:, i] = scipy.special.eval_chebyt(i, x)
+ return pe
+ elif method == 'gaussian':
+ pe = torch.zeros(max_len, d_model)
+ positions = position.float()
+ means = torch.linspace(0, max_len, d_model)
+ std = max_len / d_model
+ for i in range(d_model):
+ pe[:, i] = torch.exp(- ((positions - means[i]) **2) / (2 * std **2))
+ return pe
+ elif method == 'random_fourier':
+ B = torch.randn(d_model, 1)
+ x = position.float() / max_len
+ x = x @ B.T * 2 * math.pi
+ pe = torch.cat([torch.sin(x), torch.cos(x)], dim=1)
+ return pe[:, :d_model]
+ elif method == 'wavelet':
+ pe = torch.zeros(max_len, d_model)
+ scales = torch.arange(1, d_model+1)
+ x = position.float()
+ for i in range(d_model):
+ wavelet = scipy.signal.ricker(points=max_len, a=scales[i])
+ pe[:, i] = torch.from_numpy(wavelet[position])
+ return pe
+ elif method == 'bessel':
+ pe = torch.zeros(max_len, d_model)
+ x = position.float()
+ for i in range(d_model):
+ pe[:, i] = scipy.special.jv(i, x)
+ return pe
+ elif method == 'alternative':
+ pe = torch.zeros(max_len, d_model)
+ position_enc = position.float()
+ div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
+ pe[:, 0::2] = torch.tan(position_enc * div_term)
+ pe[:, 1::2] = torch.sin(position_enc * div_term + math.pi / 4)
+ return pe
+ elif method == 'none':
+ return torch.zeros(max_len, d_model)
+ else:
+ raise ValueError(f"Unknown positional encoding method: {method}")
+
+class CausalSelfAttention(nn.Module):
+ def __init__(self, config):
+ super().__init__()
+ self.config = config
+ assert config.n_embd % config.n_head == 0
+ self.n_head = config.n_head
+ self.n_embd = config.n_embd
+ self.dropout = config.dropout
+ self.head_dim = self.n_embd // self.n_head
+
+ self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
+ self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
+ self.resid_dropout = nn.Dropout(config.dropout)
+
+ # Implement attention-level positional encodings
+ if config.attention_type == 'rope':
+ self.rotary_dim = self.n_embd // self.n_head
+ if self.rotary_dim % 2 != 0:
+ self.rotary_dim -= self.rotary_dim % 2 # Ensure even dimension
+ inv_freq = 1.0 / (10000 ** (torch.arange(0, self.rotary_dim, 2).float() / self.rotary_dim))
+ self.register_buffer('inv_freq', inv_freq)
+ elif config.attention_type == 'alibi':
+ slopes = self.get_alibi_slopes(self.n_head)
+ self.register_buffer('alibi_slopes', slopes)
+ elif config.attention_type == 'relative':
+ num_rel_dis = 2 * config.block_size - 1
+ self.relative_positions = nn.Embedding(num_rel_dis, self.n_head)
+ # else: default attention (nothing extra to define)
+
+ def get_alibi_slopes(self, n_heads):
+ def get_slopes(n):
+ import math
+ def get_slopes_power_of_2(n):
+ start = 2 ** (-2 ** -(math.log2(n) - 3))
+ ratio = start
+ return [start * (ratio ** i) for i in range(n)]
+ if math.log2(n).is_integer():
+ return torch.Tensor(get_slopes_power_of_2(n))
+ else:
+ closest_power_of_2 = 2 ** math.floor(math.log2(n))
+ slopes = get_slopes_power_of_2(closest_power_of_2)
+ extra_slopes = get_slopes(2 * closest_power_of_2)[0::2][:n - closest_power_of_2]
+ return torch.Tensor(slopes + extra_slopes)
+ slopes = get_slopes(n_heads)
+ return slopes.view(n_heads, 1, 1)
+
+ def apply_rope(self, x):
+ # x: (B, n_head, T, head_dim)
+ seq_len = x.size(-2)
+ device = x.device
+ t = torch.arange(seq_len, device=device, dtype=self.inv_freq.dtype)
+ freqs = torch.einsum('i , j -> i j', t, self.inv_freq)
+ emb = torch.cat((freqs.sin(), freqs.cos()), dim=-1) # (T, rotary_dim)
+ emb = emb[None, None, :, :] # (1, 1, T, rotary_dim)
+ x1 = x[..., :self.rotary_dim]
+ x2 = x[..., self.rotary_dim:]
+ x1_rot = x1 * emb + torch.flip(x1, dims=[-1]) * torch.flip(emb, dims=[-1])
+ x = torch.cat((x1_rot, x2), dim=-1)
+ return x
+
+ def forward(self, x, layer_past=None):
+ B, T, C = x.size()
+ qkv = self.c_attn(x).view(B, T, 3, self.n_head, self.head_dim)
+ qkv = qkv.permute(2, 0, 3, 1, 4) # (3, B, n_head, T, head_dim)
+ q, k, v = qkv[0], qkv[1], qkv[2] # Each is (B, n_head, T, head_dim)
+
+ if self.config.attention_type == 'rope':
+ q = self.apply_rope(q)
+ k = self.apply_rope(k)
+
+ # Decide whether to use Flash Attention based on training/evaluation mode and tracking flags
+ if use_flash_attn and self.config.attention_type in ['default', 'rope'] and not (self.config.track_attention_patterns and not self.training):
+ # Use PyTorch's scaled_dot_product_attention which leverages Flash Attention 2
+ y = F.scaled_dot_product_attention(
+ q, k, v, attn_mask=None,
+ dropout_p=self.dropout if self.training else 0.0,
+ is_causal=True
+ )
+ else:
+ # Standard attention mechanism
+ attn_scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.head_dim)
+
+ if self.config.attention_type == 'alibi':
+ position_ids = torch.arange(T, device=x.device).unsqueeze(0).unsqueeze(0)
+ alibi = self.alibi_slopes.to(x.device) * position_ids # (n_head, 1, T)
+ attn_scores = attn_scores + alibi
+
+ elif self.config.attention_type == 'relative':
+ positions = torch.arange(-T+1, T, device=x.device)
+ rel_pos = self.relative_positions(positions + T -1)
+ attn_scores = attn_scores + rel_pos
+
+ # Apply causal mask
+ causal_mask = torch.tril(torch.ones(T, T, device=x.device)).view(1, 1, T, T)
+ attn_scores = attn_scores.masked_fill(causal_mask == 0, float('-inf'))
+
+ attn_weights = F.softmax(attn_scores, dim=-1)
+ attn_weights = F.dropout(attn_weights, p=self.dropout, training=self.training)
+
+ # Collect attention patterns if required
+ if self.config.track_attention_patterns and not self.training:
+ self.attn_weights = attn_weights.detach().cpu()
+ y = torch.matmul(attn_weights, v)
+
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
+ y = self.resid_dropout(self.c_proj(y))
+ return y
+
+class MLP(nn.Module):
+ def __init__(self, config):
+ super().__init__()
+ self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
+ self.gelu = nn.GELU()
+ self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
+ self.dropout = nn.Dropout(config.dropout)
+ def forward(self, x):
+ x = self.c_fc(x)
+ x = self.gelu(x)
+ x = self.c_proj(x)
+ x = self.dropout(x)
+ return x
+
+class Block(nn.Module):
+ def __init__(self, config):
+ super().__init__()
+ self.ln_1 = LayerNorm(config.n_embd, bias=config.bias)
+ self.attn = CausalSelfAttention(config)
+ self.ln_2 = LayerNorm(config.n_embd, bias=config.bias)
+ self.mlp = MLP(config)
+ def forward(self, x):
+ x = x + self.attn(self.ln_1(x))
+ x = x + self.mlp(self.ln_2(x))
+ return x
+
+@dataclass
+class GPTConfig:
+ block_size: int = 1024
+ vocab_size: int = 50304
+ n_layer: int = 12
+ n_head: int = 12
+ n_embd: int = 768
+ dropout: float = 0.0
+ bias: bool = True
+ embedding_type: str = 'default' # Default uses learned positional embeddings
+ attention_type: str = 'default' # Default attention without any modifications
+ track_activations: bool = False
+ track_attention_patterns: bool = False
+
+class GPT(nn.Module):
+ def __init__(self, config):
+ super().__init__()
+ assert config.vocab_size is not None
+ assert config.block_size is not None
+ self.config = config
+
+ self.transformer = nn.ModuleDict()
+ self.transformer['wte'] = nn.Embedding(config.vocab_size, config.n_embd)
+
+ if config.embedding_type in ['learned', 'default']:
+ self.transformer['wpe'] = nn.Embedding(config.block_size, config.n_embd)
+ self.pos_emb = None
+ elif config.embedding_type == 'none':
+ self.transformer['wpe'] = None
+ self.pos_emb = None
+ else:
+ self.transformer['wpe'] = None
+ position = torch.arange(0, config.block_size)
+ pe = get_positional_encoding(position, config.n_embd, config.embedding_type, config.block_size)
+ self.register_buffer('pos_emb', pe)
+
+ self.transformer['drop'] = nn.Dropout(config.dropout)
+ self.transformer['h'] = nn.ModuleList([Block(config) for _ in range(config.n_layer)])
+ self.transformer['ln_f'] = LayerNorm(config.n_embd, bias=config.bias)
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+ self.transformer['wte'].weight = self.lm_head.weight # Weight tying
+
+ self.apply(self._init_weights)
+ for pn, p in self.named_parameters():
+ if pn.endswith('c_proj.weight'):
+ nn.init.normal_(p, mean=0.0, std=0.02 / math.sqrt(2 * config.n_layer))
+
+ # Initialize activations and attention patterns
+ self.activations = []
+ self.attention_patterns = []
+
+ print("Number of parameters: {:.2f}M".format(self.get_num_params() / 1e6))
+
+ def get_num_params(self, non_embedding=True):
+ n_params = sum(p.numel() for p in self.parameters())
+ if non_embedding and self.transformer['wpe'] is not None:
+ n_params -= self.transformer['wpe'].weight.numel()
+ return n_params
+
+ def _init_weights(self, module):
+ if isinstance(module, nn.Linear):
+ nn.init.normal_(module.weight, mean=0.0, std=0.02)
+ if module.bias is not None:
+ nn.init.zeros_(module.bias)
+ elif isinstance(module, nn.Embedding):
+ nn.init.normal_(module.weight, mean=0.0, std=0.02)
+
+ def forward(self, idx, targets=None):
+ device = idx.device
+ b, t = idx.size()
+ assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+ pos = torch.arange(0, t, dtype=torch.long, device=device) # shape (t)
+
+ tok_emb = self.transformer['wte'](idx) # token embeddings
+
+ if self.config.embedding_type in ['learned', 'default']:
+ pos_emb = self.transformer['wpe'](pos)
+ x = tok_emb + pos_emb
+ elif self.config.embedding_type == 'none':
+ x = tok_emb
+ else:
+ pos_emb = self.pos_emb[:t, :].to(device)
+ x = tok_emb + pos_emb.unsqueeze(0)
+
+ x = self.transformer['drop'](x)
+
+ # Reset activations and attention patterns if tracking
+ if self.config.track_activations and not self.training:
+ self.activations = []
+ if self.config.track_attention_patterns and not self.training:
+ self.attention_patterns = []
+
+ for block in self.transformer['h']:
+ x = block(x)
+ if self.config.track_activations and not self.training:
+ self.activations.append(x.detach().cpu())
+ if self.config.track_attention_patterns and not self.training:
+ if hasattr(block.attn, 'attn_weights'):
+ self.attention_patterns.append(block.attn.attn_weights)
+ x = self.transformer['ln_f'](x)
+ logits = self.lm_head(x)
+
+ if targets is not None:
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1)
+ else:
+ loss = None
+
+ return logits, loss
+
+ def configure_optimizers(self, weight_decay, learning_rate, betas, device_type):
+ # Start with all candidate parameters
+ param_dict = {pn: p for pn, p in self.named_parameters() if p.requires_grad}
+ decay_params = [p for n, p in param_dict.items() if p.dim() >= 2]
+ nodecay_params = [p for n, p in param_dict.items() if p.dim() < 2]
+ optim_groups = [
+ {'params': decay_params, 'weight_decay': weight_decay},
+ {'params': nodecay_params, 'weight_decay': 0.0},
+ ]
+ fused_available = 'fused' in inspect.signature(torch.optim.AdamW).parameters
+ use_fused = fused_available and device_type == 'cuda'
+ extra_args = dict(fused=True) if use_fused else dict()
+ optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=betas, **extra_args)
+ print(f"Using fused AdamW: {use_fused}")
+
+ return optimizer
+
+ def estimate_mfu(self, fwdbwd_per_iter, dt):
+ """Estimate model flops utilization (MFU)"""
+ N = self.get_num_params()
+ cfg = self.config
+ L, H, Q, T = cfg.n_layer, cfg.n_head, cfg.n_embd // cfg.n_head, cfg.block_size
+ flops_per_token = 6 * N + 12 * L * H * Q * T
+ flops_per_fwdbwd = flops_per_token * T
+ flops_per_iter = flops_per_fwdbwd * fwdbwd_per_iter
+ flops_achieved = flops_per_iter * (1.0 / dt)
+ flops_promised = 312e12 # A100 GPU bfloat16 peak flops is 312 TFLOPS
+ mfu = flops_achieved / flops_promised
+ return mfu
+
+ @torch.no_grad()
+ def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
+ """Generate sequences of tokens from the model"""
+ for _ in range(max_new_tokens):
+ idx_cond = idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size:]
+ logits, _ = self(idx_cond)
+ logits = logits[:, -1, :] / temperature
+ if top_k is not None:
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+ logits[logits < v[:, [-1]]] = -float('Inf')
+ probs = F.softmax(logits, dim=-1)
+ idx_next = torch.multinomial(probs, num_samples=1)
+ idx = torch.cat((idx, idx_next), dim=1)
+ return idx
diff --git a/onstart.sh b/onstart.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7af92b3361f37531d2d73b77a4f21e48dc66dddd
--- /dev/null
+++ b/onstart.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+# This file is run on instance start. Output in /var/log/onstart.log
+
diff --git a/ports.log b/ports.log
new file mode 100644
index 0000000000000000000000000000000000000000..110f7310f3acdbcedf1508d2db31d12623f80b7e
--- /dev/null
+++ b/ports.log
@@ -0,0 +1 @@
+13928
diff --git a/prepare_evaluation_data.py b/prepare_evaluation_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9370e2ebb672b414f0803dd79373a69f09d8482
--- /dev/null
+++ b/prepare_evaluation_data.py
@@ -0,0 +1,97 @@
+# prepare_evaluation_data.py
+import os
+import requests
+import zipfile
+import tarfile
+import shutil
+from huggingface_hub import hf_hub_download
+
+def download_and_extract(url, extract_path):
+ filename = url.split('/')[-1]
+ if not os.path.exists(filename):
+ print(f"Downloading {filename}...")
+ r = requests.get(url)
+ with open(filename, 'wb') as f:
+ f.write(r.content)
+ else:
+ print(f"{filename} already exists.")
+
+ if filename.endswith('.zip'):
+ with zipfile.ZipFile(filename, 'r') as zip_ref:
+ zip_ref.extractall(extract_path)
+ elif filename.endswith(('.tar.gz', '.tgz')):
+ with tarfile.open(filename, 'r:gz') as tar_ref:
+ tar_ref.extractall(extract_path)
+ else:
+ print(f"Cannot extract {filename}.")
+
+def prepare_ptb():
+ url = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt'
+ os.makedirs('data/ptb', exist_ok=True)
+ for split in ['train', 'valid', 'test']:
+ split_url = url.replace('train', split)
+ r = requests.get(split_url)
+ with open(f'data/ptb/{split}.txt', 'w') as f:
+ f.write(r.text)
+ print("PTB dataset prepared.")
+
+def prepare_wikitext2():
+ import os
+ from huggingface_hub import hf_hub_download
+
+ repo_id = "Salesforce/wikitext"
+ files = [
+ "wikitext-2-v1/train-00000-of-00001.parquet",
+ "wikitext-2-v1/validation-00000-of-00001.parquet",
+ "wikitext-2-v1/test-00000-of-00001.parquet"
+ ]
+ extract_path = 'data/'
+ os.makedirs(extract_path, exist_ok=True)
+
+ print("Downloading WikiText-2 dataset from Hugging Face...")
+ for file_path in files:
+ local_path = os.path.join(extract_path, os.path.basename(file_path))
+ if not os.path.exists(local_path):
+ hf_hub_download(repo_id=repo_id, filename=file_path, local_dir=extract_path, repo_type="dataset")
+ print(f"Downloaded {os.path.basename(file_path)} to {extract_path}.")
+ else:
+ print(f"{os.path.basename(file_path)} already exists in {extract_path}.")
+ print("WikiText-2 dataset preparation complete.")
+
+def prepare_wikitext103():
+ import os
+ from huggingface_hub import hf_hub_download
+
+ repo_id = "Salesforce/wikitext"
+ files = [
+ "wikitext-103-v1/train-00000-of-00002.parquet",
+ "wikitext-103-v1/train-00001-of-00002.parquet",
+ "wikitext-103-v1/validation-00000-of-00001.parquet",
+ "wikitext-103-v1/test-00000-of-00001.parquet"
+ ]
+ extract_path = 'data/'
+ os.makedirs(extract_path, exist_ok=True)
+
+ print("Downloading WikiText-103 dataset from Hugging Face...")
+ for file_path in files:
+ local_path = os.path.join(extract_path, os.path.basename(file_path))
+ if not os.path.exists(local_path):
+ hf_hub_download(repo_id=repo_id, filename=file_path, local_dir=extract_path, repo_type="dataset")
+ print(f"Downloaded {os.path.basename(file_path)} to {extract_path}.")
+ else:
+ print(f"{os.path.basename(file_path)} already exists in {extract_path}.")
+ print("WikiText-103 dataset preparation complete.")
+
+def prepare_lambada():
+ url = 'https://raw.githubusercontent.com/cybertronai/bflm/refs/heads/master/lambada_test.jsonl'
+ os.makedirs('data/lambada', exist_ok=True)
+ r = requests.get(url)
+ with open('data/lambada/lambada_test.jsonl', 'wb') as f:
+ f.write(r.content)
+ print("LAMBADA dataset prepared.")
+
+if __name__ == '__main__':
+ prepare_ptb()
+ prepare_wikitext2()
+ prepare_wikitext103()
+ prepare_lambada()
diff --git a/train.py b/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..e297c7a79dd2402ac4a2b6f62caf850f6350339f
--- /dev/null
+++ b/train.py
@@ -0,0 +1,546 @@
+# torchrun --standalone --nproc_per_node=2 train.py --batch_size=96
+
+# train.py
+import os
+import time
+import math
+from contextlib import nullcontext
+import json
+
+import numpy as np
+import torch
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.distributed import init_process_group, destroy_process_group
+import pandas as pd
+
+import tiktoken
+from model import GPTConfig, GPT
+
+# Import wandb and tqdm
+import wandb
+from tqdm.auto import tqdm
+
+# -----------------------------------------------------------------------------
+# Default configuration with added positional encoding options
+# I/O
+out_dir = 'out'
+eval_interval = 100 # Evaluate every 100 iterations
+log_interval = 1 # Log every iteration
+eval_iters = 100
+eval_only = False
+always_save_checkpoint = True
+init_from = 'scratch' # 'scratch' | 'resume' | 'checkpoint'
+checkpoint_path = '' # Path to a specific checkpoint to load
+# wandb logging
+wandb_log = True
+wandb_project = 'gpt2_positional_encodings_10B'
+wandb_run_name = 'experiment'
+# data
+dataset = 'fineweb'
+gradient_accumulation_steps = 40
+batch_size = 12
+block_size = 512
+# model
+n_layer = 4
+n_head = 4
+n_embd = 256
+dropout = 0.0
+bias = False
+# adamw optimizer
+learning_rate = 6e-4
+max_iters = 10000
+weight_decay = 1e-1
+beta1 = 0.9
+beta2 = 0.95
+grad_clip = 1.0
+# learning rate decay settings
+decay_lr = True
+warmup_iters = 100
+lr_decay_iters = 10000
+min_lr = 6e-5
+# DDP settings
+backend = 'nccl'
+# system
+device = 'cuda'
+dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16'
+compile = True
+# Positional Encodings
+embedding_types = ['sinusoidal', 'polynomial_legendre',
+ 'polynomial_chebyshev']
+attention_types = ['default']
+# Data collection options
+collect_attention_patterns = False # Set to True to collect attention patterns
+collect_activations = False # Set to True to collect activations
+# Evaluation datasets
+eval_datasets = ['wikitext-103-v1', 'ptb', 'lambada'] # WikiText-103 and Penn Treebank
+seed = 1337
+# -----------------------------------------------------------------------------
+config_keys = [k for k, v in globals().items() if not k.startswith('_') and isinstance(v, (int, float, bool, str, list, tuple))]
+exec(open('configurator.py').read())
+config = {k: globals()[k] for k in config_keys}
+# -----------------------------------------------------------------------------
+
+def is_compatible(embedding_type, attention_type):
+ # Incompatible combinations can be specified here
+ incompatible_combinations = [
+ # If specific combinations are incompatible
+ ]
+
+ # If embedding_type or attention_type is 'none', some attention methods may not function properly
+ if embedding_type == 'none' and attention_type in ['relative', 'rope']:
+ return False
+
+ # 'rope' attention requires even dimension per head
+ if attention_type == 'rope' and ((n_embd // n_head) % 2 != 0):
+ return False
+
+ return (embedding_type, attention_type) not in incompatible_combinations
+
+def main():
+ # Initialize DDP if needed
+ global gradient_accumulation_steps
+ ddp = int(os.environ.get('RANK', -1)) != -1
+ if ddp:
+ init_process_group(backend=backend)
+ ddp_rank = int(os.environ['RANK'])
+ ddp_local_rank = int(os.environ['LOCAL_RANK'])
+ ddp_world_size = int(os.environ['WORLD_SIZE'])
+ device_local = f'cuda:{ddp_local_rank}'
+ torch.cuda.set_device(device_local)
+ master_process = ddp_rank == 0
+ seed_offset = ddp_rank
+ assert gradient_accumulation_steps % ddp_world_size == 0
+ gradient_accumulation_steps //= ddp_world_size
+ else:
+ master_process = True
+ seed_offset = 0
+ ddp_world_size = 1
+ device_local = device # Use the default device
+
+ tokens_per_iter = gradient_accumulation_steps * ddp_world_size * batch_size * block_size
+ if master_process:
+ print(f"Tokens per iteration will be: {tokens_per_iter:,}")
+
+ if master_process:
+ os.makedirs(out_dir, exist_ok=True)
+
+ # Set random seed
+ global seed
+ seed += seed_offset
+ torch.manual_seed(seed)
+ np.random.seed(seed)
+ torch.backends.cuda.matmul.allow_tf32 = True
+ torch.backends.cudnn.allow_tf32 = True
+ device_type = 'cuda' if 'cuda' in device_local else 'cpu'
+ ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
+ ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)
+
+ # Load tokenizer using tiktoken
+ tokenizer = tiktoken.get_encoding("gpt2")
+
+ # Prepare evaluation datasets
+ eval_data = {}
+ for eval_dataset in eval_datasets:
+ eval_data_path = os.path.join('data', eval_dataset)
+ if not os.path.exists(eval_data_path):
+ raise FileNotFoundError(f"Dataset {eval_dataset} not found. Please run prepare_evaluation_data.py first.")
+
+ if eval_dataset in ['wikitext-2-v1', 'wikitext-103-v1']:
+ train_file = [f for f in os.listdir(eval_data_path) if f.startswith('train')][0]
+ val_file = [f for f in os.listdir(eval_data_path) if f.startswith('validation')][0]
+
+ train_df = pd.read_parquet(os.path.join(eval_data_path, train_file))
+ val_df = pd.read_parquet(os.path.join(eval_data_path, val_file))
+
+ train_text = '\n'.join(train_df['text'])
+ val_text = '\n'.join(val_df['text'])
+
+ elif eval_dataset == 'ptb':
+ with open(os.path.join(eval_data_path, 'train.txt'), 'r') as f:
+ train_text = f.read()
+ with open(os.path.join(eval_data_path, 'valid.txt'), 'r') as f:
+ val_text = f.read()
+
+ elif eval_dataset == 'lambada':
+ with open(os.path.join(eval_data_path, 'lambada_test.jsonl'), 'r') as f:
+ data = [json.loads(line) for line in f]
+ test_text = '\n'.join([item['text'] for item in data])
+ train_text = test_text[:len(test_text)//2] # Use first half as pseudo-train
+ val_text = test_text[len(test_text)//2:] # Use second half as pseudo-val
+
+ else:
+ raise ValueError(f"Unknown dataset: {eval_dataset}")
+
+ # Tokenize
+ train_ids = tokenizer.encode_ordinary(train_text)
+ val_ids = tokenizer.encode_ordinary(val_text)
+
+ # Convert to numpy arrays
+ train_ids = np.array(train_ids, dtype=np.uint16)
+ val_ids = np.array(val_ids, dtype=np.uint16)
+
+ eval_data[eval_dataset] = {'train': train_ids, 'val': val_ids}
+
+ # Data loading
+ data_dir = os.path.join('data', dataset)
+ # Update the get_batch function to handle evaluation datasets
+ def get_batch(split, dataset='main'):
+ if dataset == 'main':
+ if split == 'train':
+ data = np.memmap(os.path.join(data_dir, 'train.bin'), dtype=np.uint16, mode='r')
+ else:
+ data = np.memmap(os.path.join(data_dir, 'val.bin'), dtype=np.uint16, mode='r')
+ else:
+ data = eval_data[dataset][split]
+
+ ix = torch.randint(len(data) - block_size, (batch_size,))
+ x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
+ y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
+ if device_type == 'cuda':
+ x, y = x.pin_memory().to(device_local, non_blocking=True), y.pin_memory().to(device_local, non_blocking=True)
+ else:
+ x, y = x.to(device_local), y.to(device_local)
+ return x, y
+
+ # Attempt to derive vocab_size from the dataset
+ meta_path = os.path.join(data_dir, 'meta.json')
+ meta_vocab_size = None
+ if os.path.exists(meta_path):
+ with open(meta_path, 'r') as f:
+ meta = json.load(f)
+ meta_vocab_size = meta['vocab_size']
+ if master_process:
+ print(f"Found vocab_size = {meta_vocab_size} (inside {meta_path})")
+
+ # Helps estimate loss and collect attention patterns and activations
+ @torch.no_grad()
+ def estimate_loss(model, collect_attention_patterns=False, collect_activations=False, save_dir=None, max_batches_to_save=None):
+ out = {}
+ model.eval()
+ # Access the underlying model if wrapped with DDP
+ raw_model = model.module if hasattr(model, 'module') else model
+
+ # Set tracking flags on the underlying model
+ raw_model.config.track_attention_patterns = collect_attention_patterns
+ raw_model.config.track_activations = collect_activations
+
+ if collect_attention_patterns or collect_activations:
+ if save_dir is None:
+ raise ValueError("save_dir must be specified when collecting attention patterns or activations.")
+ if master_process:
+ os.makedirs(save_dir, exist_ok=True)
+
+ for split in ['train', 'val']:
+ losses = torch.zeros(eval_iters)
+ save_count = 0 # Counter for saved batches
+ for k in range(eval_iters):
+ X, Y = get_batch(split)
+ with ctx:
+ logits, loss = model(X, Y)
+ losses[k] = loss.item()
+ # Collect and save attention patterns and activations
+ if (collect_attention_patterns or collect_activations) and save_count < (max_batches_to_save or eval_iters):
+ if collect_attention_patterns or collect_activations:
+ if master_process:
+ batch_dir = os.path.join(save_dir, f"{split}_batch_{k}")
+ os.makedirs(batch_dir, exist_ok=True)
+ # Save activations
+ if collect_activations and hasattr(raw_model, 'activations'):
+ for idx, activation in enumerate(raw_model.activations):
+ activation_path = os.path.join(batch_dir, f"activation_layer_{idx}.pt")
+ torch.save(activation, activation_path)
+ # Save attention patterns
+ if collect_attention_patterns and hasattr(raw_model, 'attention_patterns'):
+ for idx, attention in enumerate(raw_model.attention_patterns):
+ attention_path = os.path.join(batch_dir, f"attention_layer_{idx}.pt")
+ torch.save(attention, attention_path)
+ # Clear activations and attention patterns from the model
+ raw_model.activations = []
+ raw_model.attention_patterns = []
+ save_count += 1
+ out[split] = losses.mean().item()
+
+ # Evaluate on additional datasets
+ for eval_dataset in eval_datasets:
+ split_losses = {}
+ for split in ['train', 'val']:
+ losses = torch.zeros(eval_iters)
+ save_count = 0 # Counter for saved batches
+ for k in range(eval_iters):
+ X, Y = get_batch(split, dataset=eval_dataset)
+ with ctx:
+ logits, loss = model(X, Y)
+ losses[k] = loss.item()
+ # Collect and save attention patterns and activations
+ if (collect_attention_patterns or collect_activations) and save_count < (max_batches_to_save or eval_iters):
+ if collect_attention_patterns or collect_activations:
+ if master_process:
+ batch_dir = os.path.join(save_dir, f"{eval_dataset}_{split}_batch_{k}")
+ os.makedirs(batch_dir, exist_ok=True)
+ # Save activations
+ if collect_activations and hasattr(raw_model, 'activations'):
+ for idx, activation in enumerate(raw_model.activations):
+ activation_path = os.path.join(batch_dir, f"activation_layer_{idx}.pt")
+ torch.save(activation, activation_path)
+ # Save attention patterns
+ if collect_attention_patterns and hasattr(raw_model, 'attention_patterns'):
+ for idx, attention in enumerate(raw_model.attention_patterns):
+ attention_path = os.path.join(batch_dir, f"attention_layer_{idx}.pt")
+ torch.save(attention, attention_path)
+ # Clear activations and attention patterns from the model
+ raw_model.activations = []
+ raw_model.attention_patterns = []
+ save_count += 1
+ split_losses[split] = losses.mean().item()
+ out[eval_dataset] = split_losses
+ model.train()
+ # Reset tracking flags
+ raw_model.config.track_attention_patterns = False
+ raw_model.config.track_activations = False
+ return out
+
+ # Learning rate decay scheduler
+ def get_lr(it):
+ if it < warmup_iters:
+ return learning_rate * it / warmup_iters
+ if it > lr_decay_iters:
+ return min_lr
+ decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
+ coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio))
+ return min_lr + coeff * (learning_rate - min_lr)
+
+ # Training loop over positional encoding combinations
+ for embedding_type in embedding_types:
+ for attention_type in attention_types:
+ if not is_compatible(embedding_type, attention_type):
+ if master_process:
+ print(f"Skipping incompatible combination: Embedding={embedding_type}, Attention={attention_type}")
+ continue
+
+ # Configure model arguments
+ model_args = dict(
+ n_layer=n_layer,
+ n_head=n_head,
+ n_embd=n_embd,
+ block_size=block_size,
+ bias=bias,
+ vocab_size=None,
+ dropout=dropout,
+ embedding_type=embedding_type,
+ attention_type=attention_type,
+ track_activations=False,
+ track_attention_patterns=False,
+ )
+
+ # Initialize or resume model
+ iter_num = 0
+ best_val_loss = 1e9 # initialize best val loss to a high value
+ checkpoint = None
+ run_id = None # Initialize run_id to None
+
+ if init_from == 'scratch':
+ if master_process:
+ print(f"\nInitializing new model with embedding_type={embedding_type}, attention_type={attention_type}")
+ if meta_vocab_size is None:
+ if master_process:
+ print("Defaulting to vocab_size of GPT-2 to 50257")
+ model_args['vocab_size'] = meta_vocab_size if meta_vocab_size is not None else 50257
+ gptconf = GPTConfig(**model_args)
+ model = GPT(gptconf)
+ elif init_from == 'resume':
+ # Resume from the latest checkpoint
+ ckpt_path = os.path.join(out_dir, f"ckpt_{embedding_type}_{attention_type}.pt")
+ if not os.path.exists(ckpt_path):
+ raise FileNotFoundError(f"Checkpoint not found at {ckpt_path}")
+ if master_process:
+ print(f"\nResuming training from checkpoint {ckpt_path}")
+ checkpoint = torch.load(ckpt_path, map_location=device_local)
+ gptconf = GPTConfig(**checkpoint['model_args'])
+ model = GPT(gptconf)
+ model.load_state_dict(checkpoint['model'])
+ iter_num = checkpoint['iter_num']
+ best_val_loss = checkpoint['best_val_loss']
+ seed = checkpoint.get('seed', seed)
+ run_id = checkpoint.get('wandb_run_id', None)
+ elif init_from == 'checkpoint':
+ # Resume from a specific checkpoint
+ if not checkpoint_path or not os.path.exists(checkpoint_path):
+ raise FileNotFoundError(f"Checkpoint not found at {checkpoint_path}")
+ if master_process:
+ print(f"\nLoading model from checkpoint {checkpoint_path}")
+ checkpoint = torch.load(checkpoint_path, map_location=device_local)
+ gptconf = GPTConfig(**checkpoint['model_args'])
+ model = GPT(gptconf)
+ model.load_state_dict(checkpoint['model'])
+ iter_num = checkpoint['iter_num']
+ best_val_loss = checkpoint['best_val_loss']
+ seed = checkpoint.get('seed', seed)
+ run_id = checkpoint.get('wandb_run_id', None)
+ else:
+ raise ValueError(f"Unknown init_from '{init_from}'")
+
+ # Set random seed
+ seed += seed_offset
+ torch.manual_seed(seed)
+ np.random.seed(seed)
+
+ model.to(device_local)
+ scaler = torch.cuda.amp.GradScaler(enabled=(dtype == 'float16'))
+ optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type)
+
+ # Load optimizer state if resuming
+ if checkpoint is not None:
+ optimizer.load_state_dict(checkpoint['optimizer'])
+
+ if compile:
+ if master_process:
+ print("Compiling the model... (takes a ~minute)")
+ unoptimized_model = model
+ model = torch.compile(model)
+
+ if ddp:
+ model = DDP(model, device_ids=[ddp_local_rank])
+
+ # Logging with WandB
+ if wandb_log and master_process:
+ run_name = f"{embedding_type}_{attention_type}_{wandb_run_name}"
+ # Initialize WandB
+ wandb.init(project=wandb_project, name=run_name, config=config, resume='allow', id=run_id)
+ # Save the run ID for resuming later
+ run_id = wandb.run.id
+ else:
+ run_id = None
+
+ # Training loop
+ X, Y = get_batch('train')
+ t0 = time.time()
+ local_iter_num = 0
+ raw_model = model.module if hasattr(model, 'module') else model
+ running_mfu = -1.0
+ progress_bar = tqdm(total=max_iters, initial=iter_num, desc=f"Training {embedding_type} + {attention_type}", disable=not master_process)
+ progress_bar_update_freq = 1 # Update progress bar every iteration
+
+ while True:
+ # Determine learning rate
+ lr = get_lr(iter_num) if decay_lr else learning_rate
+ for param_group in optimizer.param_groups:
+ param_group['lr'] = lr
+
+ # Evaluate and checkpoint
+ if iter_num % eval_interval == 0 and iter_num > 0:
+ # Define save_dir for collected data
+ eval_data_dir = os.path.join('data', 'eval_data', f"{embedding_type}_{attention_type}", f"step_{iter_num}")
+ # Set a limit on the number of batches to save during evaluation
+ max_batches_to_save = 10 # Adjust this number as needed to control storage usage
+ losses = estimate_loss(model,
+ collect_attention_patterns=collect_attention_patterns,
+ collect_activations=collect_activations,
+ save_dir=eval_data_dir,
+ max_batches_to_save=max_batches_to_save)
+ if master_process:
+ print(f"\nStep {iter_num}:")
+ print(f"Train loss: {losses['train']:.4f}, Val loss: {losses['val']:.4f}")
+ for eval_dataset in eval_datasets:
+ print(f"{eval_dataset} - Train loss: {losses[eval_dataset]['train']:.4f}, Val loss: {losses[eval_dataset]['val']:.4f}")
+ # Log to wandb
+ if wandb_log:
+ wandb_metrics = {
+ "iter": iter_num,
+ "train/loss": losses['train'],
+ "val/loss": losses['val'],
+ "lr": lr,
+ "mfu": running_mfu * 100,
+ }
+ for eval_dataset in eval_datasets:
+ wandb_metrics[f"{eval_dataset}/train_loss"] = losses[eval_dataset]['train']
+ wandb_metrics[f"{eval_dataset}/val_loss"] = losses[eval_dataset]['val']
+ wandb.log(wandb_metrics, step=iter_num)
+ if losses['val'] < best_val_loss or always_save_checkpoint:
+ best_val_loss = losses['val']
+ if iter_num > 0:
+ checkpoint = {
+ 'model': raw_model.state_dict(),
+ 'optimizer': optimizer.state_dict(),
+ 'model_args': model_args,
+ 'iter_num': iter_num,
+ 'best_val_loss': best_val_loss,
+ 'config': config,
+ 'seed': seed,
+ 'wandb_run_id': run_id
+ }
+ ckpt_path = os.path.join(out_dir, f"ckpt_{embedding_type}_{attention_type}.pt")
+ if master_process:
+ print(f"Saving checkpoint to {ckpt_path}")
+ torch.save(checkpoint, ckpt_path)
+ # Update progress bar postfix
+ if master_process:
+ postfix_dict = {
+ 'train_loss': f"{losses['train']:.4f}",
+ 'val_loss': f"{losses['val']:.4f}"
+ }
+ for eval_dataset in eval_datasets:
+ postfix_dict[f"{eval_dataset}_val_loss"] = f"{losses[eval_dataset]['val']:.4f}"
+ progress_bar.set_postfix(postfix_dict)
+
+ if eval_only:
+ break
+
+ # Forward backward update
+ for micro_step in range(gradient_accumulation_steps):
+ if ddp:
+ model.require_backward_grad_sync = (micro_step == gradient_accumulation_steps - 1)
+ with ctx:
+ logits, loss = model(X, Y)
+ loss = loss / gradient_accumulation_steps
+ X, Y = get_batch('train')
+ scaler.scale(loss).backward()
+ if grad_clip != 0.0:
+ scaler.unscale_(optimizer)
+ torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
+ scaler.step(optimizer)
+ scaler.update()
+ optimizer.zero_grad(set_to_none=True)
+
+ # Logging
+ t1 = time.time()
+ dt = t1 - t0
+ t0 = t1
+ if iter_num % log_interval == 0:
+ lossf = loss.item() * gradient_accumulation_steps
+ if local_iter_num >= 5:
+ mfu = raw_model.estimate_mfu(batch_size * gradient_accumulation_steps, dt)
+ running_mfu = mfu if running_mfu == -1.0 else 0.9 * running_mfu + 0.1 * mfu
+ if master_process:
+ progress_bar.set_postfix({
+ 'loss': f"{lossf:.4f}",
+ 'lr': f"{lr:.2e}",
+ 'mfu': f"{running_mfu*100:.2f}%",
+ 'time_per_iter_ms': f"{dt * 1000:.2f}ms",
+ })
+ if wandb_log:
+ wandb.log({
+ "iter": iter_num,
+ "train/loss": lossf,
+ "lr": lr,
+ "mfu": running_mfu * 100,
+ "time_per_iter_ms": dt * 1000,
+ }, step=iter_num)
+ iter_num += 1
+ local_iter_num += 1
+ if master_process:
+ progress_bar.update(progress_bar_update_freq)
+ # Termination conditions
+ if iter_num > max_iters:
+ break
+
+ if master_process:
+ progress_bar.close()
+ if wandb_log and master_process:
+ wandb.finish()
+
+ # Destroy the process group after all models have been trained
+ if ddp:
+ destroy_process_group()
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file