diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a4352a20389838a178e8ad7ec30f479cae1b09d8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,92 @@
+---
+language: en
+thumbnail:
+license: mit
+tags:
+- question-answering
+- bert
+- bert-base
+datasets:
+- squad
+metrics:
+- squad
+widget:
+- text: "Where is the Eiffel Tower located?"
+ context: "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower."
+- text: "Who is Frederic Chopin?"
+ context: "Frédéric François Chopin, born Fryderyk Franciszek Chopin (1 March 1810 – 17 October 1849), was a Polish composer and virtuoso pianist of the Romantic era who wrote primarily for solo piano."
+---
+
+## BERT-base uncased model fine-tuned on SQuAD v1
+
+This model is block sparse: the **linear** layers contains **7.5%** of the original weights.
+
+
+The model contains **28.2%** of the original weights **overall**.
+
+The training use a modified version of Victor Sanh [Movement Pruning](https://arxiv.org/abs/2005.07683) method.
+
+That means that with the [block-sparse](https://github.com/huggingface/pytorch_block_sparse) runtime it ran **1.92x** faster than an dense networks on the evaluation, at the price of some impact on the accuracy (see below).
+
+
+
+This model was fine-tuned from the HuggingFace [BERT](https://www.aclweb.org/anthology/N19-1423/) base uncased checkpoint on [SQuAD1.1](https://rajpurkar.github.io/SQuAD-explorer), and distilled from the equivalent model [csarron/bert-base-uncased-squad-v1](https://huggingface.co/csarron/bert-base-uncased-squad-v1).
+This model is case-insensitive: it does not make a difference between english and English.
+
+## Pruning details
+A side-effect of the block pruning is that some of the attention heads are completely removed: 106 heads were removed on a total of 144 (73.6%).
+
+Here is a detailed view on how the remaining heads are distributed in the network after pruning.
+
+![Pruning details](https://huggingface.co/madlag/bert-base-uncased-squad1.1-block-sparse-0.07-v1/raw/main/model_card/pruning.svg)
+
+## Density plot
+
+
+
+## Details
+
+| Dataset | Split | # samples |
+| -------- | ----- | --------- |
+| SQuAD1.1 | train | 90.6K |
+| SQuAD1.1 | eval | 11.1k |
+
+### Fine-tuning
+- Python: `3.8.5`
+
+- Machine specs:
+
+```CPU: Intel(R) Core(TM) i7-6700K CPU
+Memory: 64 GiB
+GPUs: 1 GeForce GTX 3090, with 24GiB memory
+GPU driver: 455.23.05, CUDA: 11.1
+```
+
+
+### Results
+
+**Pytorch model file size**: `335M` (original BERT: `438M`)
+
+| Metric | # Value | # Original ([Table 2](https://www.aclweb.org/anthology/N19-1423.pdf))|
+| ------ | --------- | --------- |
+| **EM** | **71.88** | **80.8** |
+| **F1** | **81.36** | **88.5** |
+
+## Example Usage
+
+```python
+from transformers import pipeline
+
+qa_pipeline = pipeline(
+ "question-answering",
+ model="madlag/bert-base-uncased-squad1.1-block-sparse-0.07-v1",
+ tokenizer="madlag/bert-base-uncased-squad1.1-block-sparse-0.07-v1"
+)
+
+predictions = qa_pipeline({
+ 'context': "Frédéric François Chopin, born Fryderyk Franciszek Chopin (1 March 1810 – 17 October 1849), was a Polish composer and virtuoso pianist of the Romantic era who wrote primarily for solo piano.",
+ 'question': "Who is Frederic Chopin?",
+})
+
+print(predictions)
+```
\ No newline at end of file
diff --git a/model_card/layer_images/layer_0_attention_output_dense.png b/model_card/layer_images/layer_0_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..5542d2abb6b634b586d8c423ca6d8a730edbcb35
Binary files /dev/null and b/model_card/layer_images/layer_0_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_0_attention_self_key.png b/model_card/layer_images/layer_0_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..083d82e2261c15136e930a1eddcb3d308760393b
Binary files /dev/null and b/model_card/layer_images/layer_0_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_0_attention_self_query.png b/model_card/layer_images/layer_0_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..ef98a72b371f1370fba836882f260289c5ddeda8
Binary files /dev/null and b/model_card/layer_images/layer_0_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_0_attention_self_value.png b/model_card/layer_images/layer_0_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a0d52dddc228ad3ccda0be5df777dd3a2950b65
Binary files /dev/null and b/model_card/layer_images/layer_0_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_0_intermediate_dense.png b/model_card/layer_images/layer_0_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..d2cc4105c914e6806033d1a7d43f9dc9d46ed920
Binary files /dev/null and b/model_card/layer_images/layer_0_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_0_output_dense.png b/model_card/layer_images/layer_0_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..cadf13e767f7b37252f8df06724b4e6918b58517
Binary files /dev/null and b/model_card/layer_images/layer_0_output_dense.png differ
diff --git a/model_card/layer_images/layer_10_attention_output_dense.png b/model_card/layer_images/layer_10_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..2beb1e6a675ad28e61a68d65cb38deb974def2b7
Binary files /dev/null and b/model_card/layer_images/layer_10_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_10_attention_self_key.png b/model_card/layer_images/layer_10_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..6719d2c06ba5630d302ae27c6f5c0eabea37aff3
Binary files /dev/null and b/model_card/layer_images/layer_10_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_10_attention_self_query.png b/model_card/layer_images/layer_10_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..a899eeb66f58419e0c74bf54a03352dfe1eff92d
Binary files /dev/null and b/model_card/layer_images/layer_10_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_10_attention_self_value.png b/model_card/layer_images/layer_10_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..fbb371249d798290f1a22fe67db7831eb1a804cb
Binary files /dev/null and b/model_card/layer_images/layer_10_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_10_intermediate_dense.png b/model_card/layer_images/layer_10_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..f750c2c3086a60deb876c72d1218452c4e1d2cd1
Binary files /dev/null and b/model_card/layer_images/layer_10_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_10_output_dense.png b/model_card/layer_images/layer_10_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..742ca056ca3a7fa3d3429f8a0a8accdf9c578de8
Binary files /dev/null and b/model_card/layer_images/layer_10_output_dense.png differ
diff --git a/model_card/layer_images/layer_11_attention_output_dense.png b/model_card/layer_images/layer_11_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..d940c82133dfcb0f4f77edefad9c7fa75d919f65
Binary files /dev/null and b/model_card/layer_images/layer_11_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_11_attention_self_key.png b/model_card/layer_images/layer_11_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..e20616ea539eae802fdb2668fbcd3debab99914b
Binary files /dev/null and b/model_card/layer_images/layer_11_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_11_attention_self_query.png b/model_card/layer_images/layer_11_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..f05277824fe8b9d2e778552243f19805d36c6b16
Binary files /dev/null and b/model_card/layer_images/layer_11_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_11_attention_self_value.png b/model_card/layer_images/layer_11_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..05023ca12f32c3c6f84e9a6cf68c68643ad56620
Binary files /dev/null and b/model_card/layer_images/layer_11_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_11_intermediate_dense.png b/model_card/layer_images/layer_11_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..12824cb636d1164cd6c994239f0e4fbcf90fc904
Binary files /dev/null and b/model_card/layer_images/layer_11_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_11_output_dense.png b/model_card/layer_images/layer_11_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..cb3cf5077d208b1573215d3c206b4a55c1fe9719
Binary files /dev/null and b/model_card/layer_images/layer_11_output_dense.png differ
diff --git a/model_card/layer_images/layer_1_attention_output_dense.png b/model_card/layer_images/layer_1_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..07f29296c4c7342e54e42313fe09b7d8f5b36268
Binary files /dev/null and b/model_card/layer_images/layer_1_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_1_attention_self_key.png b/model_card/layer_images/layer_1_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..11a9d9341669b1deda3370396253b875f545330b
Binary files /dev/null and b/model_card/layer_images/layer_1_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_1_attention_self_query.png b/model_card/layer_images/layer_1_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..742020a025d74f7f5afe70463079d6c2af97abe0
Binary files /dev/null and b/model_card/layer_images/layer_1_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_1_attention_self_value.png b/model_card/layer_images/layer_1_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a03f304dc9333ace6275415c9613b61c3039554
Binary files /dev/null and b/model_card/layer_images/layer_1_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_1_intermediate_dense.png b/model_card/layer_images/layer_1_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..4a34a6bd7af70297587501a2d12f5dd609edd858
Binary files /dev/null and b/model_card/layer_images/layer_1_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_1_output_dense.png b/model_card/layer_images/layer_1_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..1c9610ea242bf2a2cd6310ee7a5ff9827c852f62
Binary files /dev/null and b/model_card/layer_images/layer_1_output_dense.png differ
diff --git a/model_card/layer_images/layer_2_attention_output_dense.png b/model_card/layer_images/layer_2_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..f047b23c7cb80511ee15a4f83886565680275127
Binary files /dev/null and b/model_card/layer_images/layer_2_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_2_attention_self_key.png b/model_card/layer_images/layer_2_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..053ed60f5876d020afa9963ec12d12b3e410cf6c
Binary files /dev/null and b/model_card/layer_images/layer_2_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_2_attention_self_query.png b/model_card/layer_images/layer_2_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..b69f2acf4840bf50d650c13060d4bd09e3d44279
Binary files /dev/null and b/model_card/layer_images/layer_2_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_2_attention_self_value.png b/model_card/layer_images/layer_2_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..896fe9e2a84f25eae11638d240070d2f9c8ba3ae
Binary files /dev/null and b/model_card/layer_images/layer_2_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_2_intermediate_dense.png b/model_card/layer_images/layer_2_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..25eb54c0bffaca16ca29919802a25e701dcd7772
Binary files /dev/null and b/model_card/layer_images/layer_2_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_2_output_dense.png b/model_card/layer_images/layer_2_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..a0f2d18bf4343950e8bb82317988796cd89dceb1
Binary files /dev/null and b/model_card/layer_images/layer_2_output_dense.png differ
diff --git a/model_card/layer_images/layer_3_attention_output_dense.png b/model_card/layer_images/layer_3_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..880c018483e6d018da26afccbc2fbe8c342b7f28
Binary files /dev/null and b/model_card/layer_images/layer_3_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_3_attention_self_key.png b/model_card/layer_images/layer_3_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..3d5b3afa2afb77f80c4314fcf66ddc87fecc81c5
Binary files /dev/null and b/model_card/layer_images/layer_3_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_3_attention_self_query.png b/model_card/layer_images/layer_3_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..12e8826d8e3f2853424a3a7d0e100877cf13f4f9
Binary files /dev/null and b/model_card/layer_images/layer_3_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_3_attention_self_value.png b/model_card/layer_images/layer_3_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..f5398fc1737ca8285c8fd61447eb18a9b2929668
Binary files /dev/null and b/model_card/layer_images/layer_3_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_3_intermediate_dense.png b/model_card/layer_images/layer_3_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..fda4707ccbd89f10094ac428c972709294ec6d0d
Binary files /dev/null and b/model_card/layer_images/layer_3_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_3_output_dense.png b/model_card/layer_images/layer_3_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..640fdcf95a74de34185e28646863bbb9a07b379a
Binary files /dev/null and b/model_card/layer_images/layer_3_output_dense.png differ
diff --git a/model_card/layer_images/layer_4_attention_output_dense.png b/model_card/layer_images/layer_4_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..b3352a845957c1f60b921a51dedef4041666a908
Binary files /dev/null and b/model_card/layer_images/layer_4_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_4_attention_self_key.png b/model_card/layer_images/layer_4_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..35d08f3d6a17dec3e62d4635716673d4182f9b79
Binary files /dev/null and b/model_card/layer_images/layer_4_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_4_attention_self_query.png b/model_card/layer_images/layer_4_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..7bc934db00decec909f88c11c962292ff85dd149
Binary files /dev/null and b/model_card/layer_images/layer_4_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_4_attention_self_value.png b/model_card/layer_images/layer_4_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..afd2252f0419791c97f4b4298de49b1a15990f33
Binary files /dev/null and b/model_card/layer_images/layer_4_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_4_intermediate_dense.png b/model_card/layer_images/layer_4_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..ff1f9a7f97fa367241ab72582d1813adcd625ad3
Binary files /dev/null and b/model_card/layer_images/layer_4_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_4_output_dense.png b/model_card/layer_images/layer_4_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..510b70e53b1f42b0cb73736dcf94358efba95194
Binary files /dev/null and b/model_card/layer_images/layer_4_output_dense.png differ
diff --git a/model_card/layer_images/layer_5_attention_output_dense.png b/model_card/layer_images/layer_5_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..e2fb63949fcb1d11da0950c866ae8c6dd8dc770c
Binary files /dev/null and b/model_card/layer_images/layer_5_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_5_attention_self_key.png b/model_card/layer_images/layer_5_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..6eca4a533ea8e1e4dbee7367b3ea415a9c9e75b0
Binary files /dev/null and b/model_card/layer_images/layer_5_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_5_attention_self_query.png b/model_card/layer_images/layer_5_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..3ea8cdc8ce64887146d00f61551a7cac111a3ffd
Binary files /dev/null and b/model_card/layer_images/layer_5_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_5_attention_self_value.png b/model_card/layer_images/layer_5_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..c700403adfa766eb02d4ef362bbab38a21be90b0
Binary files /dev/null and b/model_card/layer_images/layer_5_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_5_intermediate_dense.png b/model_card/layer_images/layer_5_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..f3efe38e22669260b5df8abd00b5321d27468e25
Binary files /dev/null and b/model_card/layer_images/layer_5_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_5_output_dense.png b/model_card/layer_images/layer_5_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b30b1c7eec1c0fe5224f0d9dfd593f2648f7f2a
Binary files /dev/null and b/model_card/layer_images/layer_5_output_dense.png differ
diff --git a/model_card/layer_images/layer_6_attention_output_dense.png b/model_card/layer_images/layer_6_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..eab13b453ac474fe0012467f4fc78f6717948ab5
Binary files /dev/null and b/model_card/layer_images/layer_6_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_6_attention_self_key.png b/model_card/layer_images/layer_6_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..3fc744f0b10c33dc7202bef2decaf17fa257dbff
Binary files /dev/null and b/model_card/layer_images/layer_6_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_6_attention_self_query.png b/model_card/layer_images/layer_6_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..5b6c4156034298a62b4faa7f3313d40068ba54e5
Binary files /dev/null and b/model_card/layer_images/layer_6_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_6_attention_self_value.png b/model_card/layer_images/layer_6_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..dd92aa0a8949c9621dd0f66d00e957b53132d3c7
Binary files /dev/null and b/model_card/layer_images/layer_6_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_6_intermediate_dense.png b/model_card/layer_images/layer_6_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..739c971eec8893f442789bae3262db641910c356
Binary files /dev/null and b/model_card/layer_images/layer_6_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_6_output_dense.png b/model_card/layer_images/layer_6_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..accf104e79589e3e30b6850b557651ed9499e749
Binary files /dev/null and b/model_card/layer_images/layer_6_output_dense.png differ
diff --git a/model_card/layer_images/layer_7_attention_output_dense.png b/model_card/layer_images/layer_7_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..23978e0ce478694b9be209dabe1e8c436f80389d
Binary files /dev/null and b/model_card/layer_images/layer_7_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_7_attention_self_key.png b/model_card/layer_images/layer_7_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca38b023489b9f7fb93ebea926991d2f8d670510
Binary files /dev/null and b/model_card/layer_images/layer_7_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_7_attention_self_query.png b/model_card/layer_images/layer_7_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..dc0d573c07d5a54944d90c335be4a4307e3df692
Binary files /dev/null and b/model_card/layer_images/layer_7_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_7_attention_self_value.png b/model_card/layer_images/layer_7_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..5158a308c93c2fcc95608696cf1efe7da163ce33
Binary files /dev/null and b/model_card/layer_images/layer_7_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_7_intermediate_dense.png b/model_card/layer_images/layer_7_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..0970e6d6444a7502f6a36afca8f24e7fe68c5cae
Binary files /dev/null and b/model_card/layer_images/layer_7_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_7_output_dense.png b/model_card/layer_images/layer_7_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ed55aa33e9488551658545b8bb50b8d43420263
Binary files /dev/null and b/model_card/layer_images/layer_7_output_dense.png differ
diff --git a/model_card/layer_images/layer_8_attention_output_dense.png b/model_card/layer_images/layer_8_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..d1b9d8a95776c62331992e91795e86cf0abceb51
Binary files /dev/null and b/model_card/layer_images/layer_8_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_8_attention_self_key.png b/model_card/layer_images/layer_8_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..4582d00b6850b295c6a4bc36c99dfae5a32de2b8
Binary files /dev/null and b/model_card/layer_images/layer_8_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_8_attention_self_query.png b/model_card/layer_images/layer_8_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..a2c437788c571107b938e2954d6d0728c1338359
Binary files /dev/null and b/model_card/layer_images/layer_8_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_8_attention_self_value.png b/model_card/layer_images/layer_8_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..0b3c55845f22eb1b1cb73cedb542cb81370e9407
Binary files /dev/null and b/model_card/layer_images/layer_8_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_8_intermediate_dense.png b/model_card/layer_images/layer_8_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..f63185bdbd8685fb331a117b63a172d0980d30b2
Binary files /dev/null and b/model_card/layer_images/layer_8_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_8_output_dense.png b/model_card/layer_images/layer_8_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..78bab33d84125868d848f53e15f6884f54be621a
Binary files /dev/null and b/model_card/layer_images/layer_8_output_dense.png differ
diff --git a/model_card/layer_images/layer_9_attention_output_dense.png b/model_card/layer_images/layer_9_attention_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..3488ff14566004b8e5f69bda95b95fd759a70225
Binary files /dev/null and b/model_card/layer_images/layer_9_attention_output_dense.png differ
diff --git a/model_card/layer_images/layer_9_attention_self_key.png b/model_card/layer_images/layer_9_attention_self_key.png
new file mode 100644
index 0000000000000000000000000000000000000000..2a6fbe2cdcd09c51f4b2fbea2cf42038b7a4e5d7
Binary files /dev/null and b/model_card/layer_images/layer_9_attention_self_key.png differ
diff --git a/model_card/layer_images/layer_9_attention_self_query.png b/model_card/layer_images/layer_9_attention_self_query.png
new file mode 100644
index 0000000000000000000000000000000000000000..48cd9153045b0cf6a0093aae3a3d886654bbfc77
Binary files /dev/null and b/model_card/layer_images/layer_9_attention_self_query.png differ
diff --git a/model_card/layer_images/layer_9_attention_self_value.png b/model_card/layer_images/layer_9_attention_self_value.png
new file mode 100644
index 0000000000000000000000000000000000000000..9a1ecfd5a3044904ad807d66f8634c2213cd36fc
Binary files /dev/null and b/model_card/layer_images/layer_9_attention_self_value.png differ
diff --git a/model_card/layer_images/layer_9_intermediate_dense.png b/model_card/layer_images/layer_9_intermediate_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..d2aed47501093f18f3e2797d7b34305f58737c83
Binary files /dev/null and b/model_card/layer_images/layer_9_intermediate_dense.png differ
diff --git a/model_card/layer_images/layer_9_output_dense.png b/model_card/layer_images/layer_9_output_dense.png
new file mode 100644
index 0000000000000000000000000000000000000000..f25956ec64aa33a34c5b225686439e9cdc023ff4
Binary files /dev/null and b/model_card/layer_images/layer_9_output_dense.png differ
diff --git a/model_card/pruning.svg b/model_card/pruning.svg
new file mode 100644
index 0000000000000000000000000000000000000000..dfd0e23f16827bf8be80d966f500efffdc3f0f6e
--- /dev/null
+++ b/model_card/pruning.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/model_meta.json b/model_meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..304f0a25d887d368d5d46b4ea329579ba63d53e6
--- /dev/null
+++ b/model_meta.json
@@ -0,0 +1,160 @@
+{
+ "args": {
+ "adam_epsilon": 1e-08,
+ "alpha_ce": 0.1,
+ "alpha_distil": 0.9,
+ "ampere_learning_rate": 0.01,
+ "ampere_mask_init": "constant",
+ "ampere_mask_scale": 0.0,
+ "ampere_pruning_method": "disabled",
+ "cache_dir": "",
+ "config_name": "",
+ "data_dir": "squad_data",
+ "do_eval": true,
+ "do_lower_case": true,
+ "do_train": true,
+ "doc_stride": 128,
+ "eval_all_checkpoints": true,
+ "eval_batch_size": 16,
+ "evaluate_during_training": false,
+ "final_ampere_temperature": 20,
+ "final_lambda": 200,
+ "final_shuffling_temperature": 20,
+ "final_threshold": 0.1,
+ "final_warmup": 10,
+ "fp16": false,
+ "fp16_opt_level": "O1",
+ "global_topk": false,
+ "global_topk_frequency_compute": 25,
+ "gradient_accumulation_steps": 1,
+ "in_shuffling_group": 4,
+ "initial_ampere_temperature": 0.0,
+ "initial_shuffling_temperature": 0.1,
+ "initial_threshold": 0.0,
+ "initial_warmup": 1,
+ "lang_id": 0,
+ "learning_rate": 3e-05,
+ "local_rank": -1,
+ "logging_steps": 500,
+ "mask_block_cols": 32,
+ "mask_block_rows": 32,
+ "mask_init": "constant",
+ "mask_scale": 0.0,
+ "mask_scores_learning_rate": 0.01,
+ "max_answer_length": 30,
+ "max_grad_norm": 1.0,
+ "max_query_length": 64,
+ "max_seq_length": 384,
+ "max_steps": -1,
+ "model_name_or_path": "bert-base-uncased",
+ "model_type": "masked_bert",
+ "n_best_size": 20,
+ "n_gpu": 1,
+ "no_cuda": false,
+ "null_score_diff_threshold": 0.0,
+ "num_train_epochs": 20.0,
+ "out_shuffling_group": 4,
+ "overwrite_cache": false,
+ "overwrite_output_dir": true,
+ "per_gpu_eval_batch_size": 16,
+ "per_gpu_train_batch_size": 16,
+ "predict_file": "dev-v1.1.json",
+ "pruning_method": "sigmoied_threshold",
+ "pruning_submethod": "default",
+ "regularization": "l1",
+ "save_steps": 5000,
+ "seed": 42,
+ "server_ip": "",
+ "server_port": "",
+ "shuffling_learning_rate": 0.001,
+ "shuffling_method": "disabled",
+ "teacher_name_or_path": "csarron/bert-base-uncased-squad-v1",
+ "teacher_type": "bert",
+ "temperature": 2.0,
+ "threads": 8,
+ "tokenizer_name": "",
+ "train_batch_size": 16,
+ "train_file": "train-v1.1.json",
+ "truncate_train_examples": -1,
+ "verbose_logging": false,
+ "version_2_with_negative": false,
+ "warmup_steps": 5400,
+ "weight_decay": 0.0
+ },
+ "config": {
+ "_name_or_path": "bert-base-uncased",
+ "ampere_mask_init": "constant",
+ "ampere_mask_scale": 0.0,
+ "ampere_pruning_method": "disabled",
+ "architectures": ["MaskedBertForQuestionAnswering"],
+ "attention_probs_dropout_prob": 0.1,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 768,
+ "in_shuffling_group": 4,
+ "initializer_range": 0.02,
+ "intermediate_size": 3072,
+ "layer_norm_eps": 1e-12,
+ "mask_block_cols": 32,
+ "mask_block_rows": 32,
+ "mask_init": "constant",
+ "mask_scale": 0.0,
+ "max_position_embeddings": 512,
+ "model_type": "masked_bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "out_shuffling_group": 4,
+ "pad_token_id": 0,
+ "pruning_method": "sigmoied_threshold",
+ "pruning_submethod": "default",
+ "shuffling_method": "disabled",
+ "type_vocab_size": 2,
+ "vocab_size": 30522
+ },
+ "packaging": {
+ "model_name": "madlag/bert-base-uncased-squad1.1-block-sparse-0.07-v1",
+ "model_owner": "madlag",
+ "pytorch_final_file_size": 352215223
+ },
+ "performance": {
+ "dense": {
+ "eval_elapsed_time": 43.41997216496384
+ },
+ "pytorch_block_sparse": {
+ "eval_elapsed_time": 22.587281233048998
+ },
+ "speedup": 1.922319544214693
+ },
+ "precision": {
+ "exact": 71.8826904296875,
+ "f1": 81.3593978881836
+ },
+ "sparsity": {
+ "ampere": false,
+ "block_size": [32, 32],
+ "block_sparse": true,
+ "block_sparse_density": 0.07493007330246913,
+ "block_sparse_nnz": 6215,
+ "block_sparse_total": 82944,
+ "global_density": 0.2823549074092054,
+ "is_block_sparse_valid": true,
+ "nnz_parameters": 30913282,
+ "parameters": 109483778,
+ "pruned_heads": {
+ "0": [0, 1, 2, 4, 5, 6, 7, 9, 11],
+ "1": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10],
+ "2": [1, 2, 3, 4, 5, 7, 8, 10, 11],
+ "3": [2, 3, 4, 6, 7, 9, 10, 11],
+ "4": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11],
+ "5": [0, 1, 2, 4, 5, 6, 7, 11],
+ "6": [0, 1, 2, 3, 4, 5, 6, 7, 10, 11],
+ "7": [1, 2, 3, 5, 6, 7, 11],
+ "8": [0, 1, 2, 3, 4, 5, 6, 7, 8],
+ "9": [1, 3, 4, 5, 7, 9, 10, 11],
+ "10": [0, 1, 2, 4, 5, 6, 7, 8, 9],
+ "11": [0, 2, 3, 5, 7, 8, 9, 10, 11]
+ },
+ "total_attention_heads": 144,
+ "total_pruned_attention_heads": 106
+ }
+}
\ No newline at end of file