diff --git "a/vocab/gpt_neox_chinese_v1/to_v2/word_count.corpus.remove.jsonl" "b/vocab/gpt_neox_chinese_v1/to_v2/word_count.corpus.remove.jsonl" deleted file mode 100644--- "a/vocab/gpt_neox_chinese_v1/to_v2/word_count.corpus.remove.jsonl" +++ /dev/null @@ -1,24948 +0,0 @@ -{"id": 19979, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 44919, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 7421, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 36393, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 20170, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 25835, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 2763, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 27728, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 8369, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 48303, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 49051, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 44652, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 47666, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 43833, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 41727, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 39454, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 37505, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 37242, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 31617, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 37992, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 40960, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 37129, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 36732, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 36321, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 11261, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 1085, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 28272, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 26625, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 24245, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 25878, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 25511, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 24262, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 24546, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 5980, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 43068, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 23913, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 24356, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 22686, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 22177, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 36411, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 47464, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 21336, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 19617, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 46500, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 18411, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 39992, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 29162, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 17677, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 16727, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 48513, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 41642, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 17067, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 38531, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 16085, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 39229, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 22672, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 15533, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 14921, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 37974, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 48523, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 33634, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 14686, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 33640, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 13075, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 17240, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 13582, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 13475, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 29942, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 13873, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 27323, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 25927, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 13113, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 14222, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 13095, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 29424, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 37570, "token": "\u01200000000000000000000000000000000000", "merges": "\u012000000000000000000000000000000000 00", "count": 0, "type": "remove by frequency"} -{"id": 12375, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 8533, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 12703, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 26213, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 3263, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 12849, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 47729, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 542, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 41606, "token": "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124", "merges": "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124 \u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124", "count": 0, "type": "remove by frequency"} -{"id": 9740, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 10033, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 22597, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 10204, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 18774, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 42897, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 9531, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 20102, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 6596, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 9219, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 8044, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 20380, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 16310, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 7828, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 46336, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 18118, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 7643, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 33528, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 30462, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 1636, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 4119, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 5640, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 17482, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 5808, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 12826, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 48458, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 5144, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 19738, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 14243, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 21981, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 5121, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 2580, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 4333, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 50133, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 15518, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 4643, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 8940, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 42010, "token": "\u0120BytePtrFromString", "merges": "\u0120Byte PtrFromString", "count": 0, "type": "remove by frequency"} -{"id": 4356, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 1188, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 13286, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 24675, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 11475, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 3990, "token": "\u0120\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120 \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 349, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 26868, "token": "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124", "merges": "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124 \u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124", "count": 0, "type": "remove by frequency"} -{"id": 1015, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 39922, "token": "1451450014514500", "merges": "14514500 14514500", "count": 0, "type": "remove by frequency"} -{"id": 25766, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 47034, "token": "\u010a\u0120\u010a\u0120\u010a\u0120\u010a\u0120\u010a\u0120\u010a\u0120\u010a\u0120\u010a\u0120", "merges": "\u010a\u0120\u010a\u0120\u010a\u0120\u010a\u0120 \u010a\u0120\u010a\u0120\u010a\u0120\u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 30871, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 2857, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 9660, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 31072, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 38827, "token": "\u010d\u010a\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d\u010a\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 38411, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 3380, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 4945, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 44782, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 32972, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 3203, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 7833, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 41557, "token": "PtrFromString", "merges": "Ptr FromString", "count": 0, "type": "remove by frequency"} -{"id": 21024, "token": "\u0120\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120 \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 6066, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 8173, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 23893, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 3059, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 935, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 34229, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 36209, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 1932, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 37406, "token": "medscimonit", "merges": "medsc imonit", "count": 0, "type": "remove by frequency"} -{"id": 30371, "token": "marinedrugs", "merges": "marined rugs", "count": 0, "type": "remove by frequency"} -{"id": 46209, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 17091, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 48581, "token": "\u0120\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120 \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 45659, "token": "\u0120Spacewatch", "merges": "\u0120Space watch", "count": 0, "type": "remove by frequency"} -{"id": 41499, "token": "\u010d\u010d\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d\u010d \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 33463, "token": "\u0120Parlamento", "merges": "\u0120Parl amento", "count": 0, "type": "remove by frequency"} -{"id": 7633, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 23883, "token": "\u010d\u010a\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d\u010a\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 18761, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 2674, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 2463, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 49176, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 21120, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 38568, "token": "\u0120Appellees", "merges": "\u0120App ellees", "count": 0, "type": "remove by frequency"} -{"id": 47827, "token": "mathchoice", "merges": "math choice", "count": 0, "type": "remove by frequency"} -{"id": 47147, "token": "\u010a\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 47231, "token": "\u0120\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120 \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 33235, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 2286, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 576, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 45536, "token": "\u0120Euroopan", "merges": "\u0120Euro opan", "count": 0, "type": "remove by frequency"} -{"id": 2707, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 28912, "token": "^\u00e2\u012a\u0134/\u00e2\u012a\u0134^", "merges": "^\u00e2\u012a\u0134/\u00e2\u012a\u0134 ^", "count": 0, "type": "remove by frequency"} -{"id": 45437, "token": "\u0120AFFIRMED", "merges": "\u0120A FFIRMED", "count": 0, "type": "remove by frequency"} -{"id": 15398, "token": "\u010a\u0120\u0120\u0120\u0120\u010a\u0120\u0120\u0120", "merges": "\u010a\u0120\u0120\u0120\u0120 \u010a\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 43566, "token": "\u010a\u0120\u0120\u00c2\u0142\u00c2\u0142\u00c2\u0142", "merges": "\u010a\u0120\u0120 \u00c2\u0142\u00c2\u0142\u00c2\u0142", "count": 0, "type": "remove by frequency"} -{"id": 13300, "token": "\u0120\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120 \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 37293, "token": "\u010a\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 5452, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 22702, "token": "\u0120errnoErr", "merges": "\u0120errno Err", "count": 0, "type": "remove by frequency"} -{"id": 47245, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 286, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120 \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 648, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 39743, "token": "14514500", "merges": "14514 500", "count": 0, "type": "remove by frequency"} -{"id": 25395, "token": "\u010a\u0120\u010a\u0120\u010a\u0120\u010a\u0120", "merges": "\u010a\u0120\u010a\u0120 \u010a\u0120\u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 17030, "token": "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124", "merges": "\u00c3\u0125\u00c3\u0124 \u00c3\u0125\u00c3\u0124", "count": 0, "type": "remove by frequency"} -{"id": 23286, "token": "^\u00e2\u012a\u0134/\u00e2\u012a\u0134", "merges": "^\u00e2\u012a\u0134 /\u00e2\u012a\u0134", "count": 0, "type": "remove by frequency"} -{"id": 48018, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 25330, "token": "\u010a\u010a\u010a\u010a\u010a\u010a\u010a\u010a", "merges": "\u010a\u010a\u010a\u010a \u010a\u010a\u010a\u010a", "count": 0, "type": "remove by frequency"} -{"id": 45936, "token": "\u010a\u010a\u010a\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u010a\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 6530, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 35088, "token": "...\\...\\", "merges": "...\\ ...\\", "count": 0, "type": "remove by frequency"} -{"id": 46045, "token": "brainsci", "merges": "brains ci", "count": 0, "type": "remove by frequency"} -{"id": 37682, "token": "\u010a\u010c\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010c \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 41689, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d\u010a \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 44863, "token": "\u0120\u010a\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u010a \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 467, "token": "\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 43226, "token": "\u00e2\u012a\u0134/\u00e2\u012a\u0134", "merges": "\u00e2\u012a\u0134 /\u00e2\u012a\u0134", "count": 0, "type": "remove by frequency"} -{"id": 42820, "token": "\u010a\u0109\u0109\u0109\u0120\u0120\u0120", "merges": "\u010a\u0109\u0109\u0109 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 17293, "token": "\u010d\u010a\u010d\u010a\u0120\u0120\u0120", "merges": "\u010d\u010a \u010d\u010a\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 39472, "token": "\u010a\u0109\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u0109 \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 8431, "token": "\u010d\u010a\u0120\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 3893, "token": "\u010a\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 22937, "token": "\u0120\u010a\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120 \u010a\u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 35451, "token": "\u010a\u010a\u010a\u010a\u010a\u010a\u010a", "merges": "\u010a\u010a\u010a\u010a \u010a\u010a\u010a", "count": 0, "type": "remove by frequency"} -{"id": 43993, "token": "\u0120\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 23734, "token": "\u0120[****,", "merges": "\u0120[ ****,", "count": 0, "type": "remove by frequency"} -{"id": 42595, "token": "\u010d\u010a\u0109\u0109\u0109\u0109\u0109", "merges": "\u010d \u010a\u0109\u0109\u0109\u0109\u0109", "count": 0, "type": "remove by frequency"} -{"id": 33835, "token": "doibase", "merges": "do ibase", "count": 0, "type": "remove by frequency"} -{"id": 5814, "token": "\u010a\u010a\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 979, "token": "\u0120\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 1043, "token": "\u010a\u0120\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 48471, "token": ":**]{}", "merges": ":** ]{}", "count": 0, "type": "remove by frequency"} -{"id": 43134, "token": "\u0120()]{}", "merges": "\u0120() ]{}", "count": 0, "type": "remove by frequency"} -{"id": 25416, "token": "[\\*](#", "merges": "[ \\*](#", "count": 0, "type": "remove by frequency"} -{"id": 36487, "token": ".**]{}", "merges": ".** ]{}", "count": 0, "type": "remove by frequency"} -{"id": 46924, "token": "obbsee", "merges": "obb see", "count": 0, "type": "remove by frequency"} -{"id": 39900, "token": "\u010a\u010a\u0109\u0109\u0109\u0109", "merges": "\u010a\u010a \u0109\u0109\u0109\u0109", "count": 0, "type": "remove by frequency"} -{"id": 41305, "token": "]{}\\_[", "merges": "]{}\\ _[", "count": 0, "type": "remove by frequency"} -{"id": 11763, "token": "ijerph", "merges": "ijer ph", "count": 0, "type": "remove by frequency"} -{"id": 26046, "token": "\u010a\u010a\u010a\u0120\u0120\u0120", "merges": "\u010a\u010a \u010a\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 46468, "token": "\\])]{}", "merges": "\\]) ]{}", "count": 0, "type": "remove by frequency"} -{"id": 34761, "token": "\u0120()](\\", "merges": "\u0120() ](\\", "count": 0, "type": "remove by frequency"} -{"id": 28655, "token": "\u010a\u0109\u0109\u0120\u0120\u0120", "merges": "\u010a\u0109\u0109 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 6257, "token": "**]{},", "merges": "** ]{},", "count": 0, "type": "remove by frequency"} -{"id": 46597, "token": "\u0120${{{\\", "merges": "\u0120${ {{\\", "count": 0, "type": "remove by frequency"} -{"id": 10072, "token": "\u010a\u010a\u0120\u0120\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 27401, "token": "\u0120\u010a\u0120\u0120\u0120\u0120", "merges": "\u0120 \u010a\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 29223, "token": "\u010a\u0120\u0120\u00e2\u0122\u0125", "merges": "\u010a\u0120\u0120 \u00e2\u0122\u0125", "count": 0, "type": "remove by frequency"} -{"id": 38710, "token": "\u0120\\*\\**", "merges": "\u0120\\* \\**", "count": 0, "type": "remove by frequency"} -{"id": 44162, "token": "]{}\\^[", "merges": "]{}\\ ^[", "count": 0, "type": "remove by frequency"} -{"id": 43782, "token": ".^\\[[@", "merges": ". ^\\[[@", "count": 0, "type": "remove by frequency"} -{"id": 27720, "token": ".*]{},", "merges": ".* ]{},", "count": 0, "type": "remove by frequency"} -{"id": 28778, "token": "\u010d\u010a\u0109\u0109\u0109\u0109", "merges": "\u010d \u010a\u0109\u0109\u0109\u0109", "count": 0, "type": "remove by frequency"} -{"id": 47056, "token": "}}}({\\", "merges": "}} }({\\", "count": 0, "type": "remove by frequency"} -{"id": 9899, "token": "]{}]{}", "merges": "]{} ]{}", "count": 0, "type": "remove by frequency"} -{"id": 31988, "token": "\u0120\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 17723, "token": "\u010d\u010a\u0120\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 45099, "token": "}}({{\\", "merges": "}}( {{\\", "count": 0, "type": "remove by frequency"} -{"id": 35950, "token": "\u0120\u0120\u010a\u0120\u0120\u0120", "merges": "\u0120\u0120 \u010a\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 39310, "token": "\"}**).", "merges": "\"} **).", "count": 0, "type": "remove by frequency"} -{"id": 674, "token": "\u0120\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 1760, "token": "\u010a\u0120\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 3568, "token": "\u010d\u010a\u0120\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 18022, "token": "\\*](#", "merges": "\\* ](#", "count": 0, "type": "remove by frequency"} -{"id": 31096, "token": "^\\[[@", "merges": "^ \\[[@", "count": 0, "type": "remove by frequency"} -{"id": 29683, "token": "\u010a\u0120\u0120\u00c2\u0142", "merges": "\u010a\u0120\u0120 \u00c2\u0142", "count": 0, "type": "remove by frequency"} -{"id": 22180, "token": "_{{{\\", "merges": "_{ {{\\", "count": 0, "type": "remove by frequency"} -{"id": 17158, "token": "\u010d\u010a\u0109\u0109\u0109", "merges": "\u010d \u010a\u0109\u0109\u0109", "count": 0, "type": "remove by frequency"} -{"id": 39083, "token": "\"}](#", "merges": "\"} ](#", "count": 0, "type": "remove by frequency"} -{"id": 47312, "token": "iNdEx", "merges": "i NdEx", "count": 0, "type": "remove by frequency"} -{"id": 20977, "token": "}$]{}", "merges": "}$ ]{}", "count": 0, "type": "remove by frequency"} -{"id": 7444, "token": "**]{}", "merges": "** ]{}", "count": 0, "type": "remove by frequency"} -{"id": 1254, "token": "\u0120\\[[@", "merges": "\u0120\\[ [@", "count": 0, "type": "remove by frequency"} -{"id": 35379, "token": ")}^{(", "merges": ") }^{(", "count": 0, "type": "remove by frequency"} -{"id": 26061, "token": "\u010d\u010a\u010d\u010a\u0120", "merges": "\u010d\u010a \u010d\u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 8129, "token": "\u0120\u010a\u0120\u0120\u0120", "merges": "\u0120 \u010a\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 24994, "token": "^\u00e2\u012a\u0134^", "merges": "^\u00e2\u012a\u0134 ^", "count": 0, "type": "remove by frequency"} -{"id": 26119, "token": "*]{}.", "merges": "*]{} .", "count": 0, "type": "remove by frequency"} -{"id": 27655, "token": "\u0120$[]$", "merges": "\u0120$ []$", "count": 0, "type": "remove by frequency"} -{"id": 8863, "token": "\u010a\u010a\u010a\u010a\u010a", "merges": "\u010a\u010a \u010a\u010a\u010a", "count": 0, "type": "remove by frequency"} -{"id": 16207, "token": "\u0120$${\\", "merges": "\u0120$$ {\\", "count": 0, "type": "remove by frequency"} -{"id": 34833, "token": "\u0120xcex", "merges": "\u0120x cex", "count": 0, "type": "remove by frequency"} -{"id": 38320, "token": "\u0120\u00c2\u00b6\u00c2\u00b6", "merges": "\u0120\u00c2\u00b6 \u00c2\u00b6", "count": 0, "type": "remove by frequency"} -{"id": 45488, "token": "\u0120([**", "merges": "\u0120([ **", "count": 0, "type": "remove by frequency"} -{"id": 1496, "token": "\u010a\u010a\u0120\u0120\u0120", "merges": "\u010a \u010a\u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 49021, "token": "$.[]{", "merges": "$. []{", "count": 0, "type": "remove by frequency"} -{"id": 33158, "token": "\u010a\u0109\u0109\u010a\u0109", "merges": "\u010a\u0109\u0109 \u010a\u0109", "count": 0, "type": "remove by frequency"} -{"id": 28122, "token": "\u010a\u010a\u0109\u0109\u0109", "merges": "\u010a\u010a\u0109\u0109 \u0109", "count": 0, "type": "remove by frequency"} -{"id": 29013, "token": "\\]]{}", "merges": "\\] ]{}", "count": 0, "type": "remove by frequency"} -{"id": 32964, "token": "}}.$$", "merges": "}} .$$", "count": 0, "type": "remove by frequency"} -{"id": 7201, "token": "\u0120${{\\", "merges": "\u0120$ {{\\", "count": 0, "type": "remove by frequency"} -{"id": 30324, "token": "\u010a\u0120\u0120\u010a\u0120", "merges": "\u010a\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 13663, "token": ".*]{}", "merges": ". *]{}", "count": 0, "type": "remove by frequency"} -{"id": 31350, "token": "$]{};", "merges": "$]{} ;", "count": 0, "type": "remove by frequency"} -{"id": 35745, "token": "\u010d\u010a\u010d\u010a\u010d", "merges": "\u010d\u010a \u010d\u010a\u010d", "count": 0, "type": "remove by frequency"} -{"id": 45382, "token": "]{}\\_", "merges": "]{}\\ _", "count": 0, "type": "remove by frequency"} -{"id": 15775, "token": ".\\[[@", "merges": ". \\[[@", "count": 0, "type": "remove by frequency"} -{"id": 7254, "token": "]--[@", "merges": "]-- [@", "count": 0, "type": "remove by frequency"} -{"id": 39049, "token": "}})$.", "merges": "}} )$.", "count": 0, "type": "remove by frequency"} -{"id": 42041, "token": "\u0120\u00c2\u00a7\\[", "merges": "\u0120\u00c2\u00a7 \\[", "count": 0, "type": "remove by frequency"} -{"id": 42348, "token": ")}.$$", "merges": ")} .$$", "count": 0, "type": "remove by frequency"} -{"id": 14352, "token": "\u010a\u0109\u0120\u0120\u0120", "merges": "\u010a\u0109 \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 49824, "token": "\u0120[(\\[", "merges": "\u0120[ (\\[", "count": 0, "type": "remove by frequency"} -{"id": 11661, "token": "*]{},", "merges": "* ]{},", "count": 0, "type": "remove by frequency"} -{"id": 20426, "token": "\u0120\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 33805, "token": "]{}\\^", "merges": "]{}\\ ^", "count": 0, "type": "remove by frequency"} -{"id": 36134, "token": "[\\*\\*", "merges": "[ \\*\\*", "count": 0, "type": "remove by frequency"} -{"id": 38475, "token": "\"}\\].", "merges": "\"} \\].", "count": 0, "type": "remove by frequency"} -{"id": 39015, "token": "\u010d\u010a\u010d\u010a\u0109", "merges": "\u010d\u010a\u010d \u010a\u0109", "count": 0, "type": "remove by frequency"} -{"id": 39077, "token": "\\}.$$", "merges": "\\ }.$$", "count": 0, "type": "remove by frequency"} -{"id": 252, "token": "\u0120\u0120\u0120\u0120", "merges": "\u0120\u0120 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 477, "token": "\u010a\u0120\u0120\u0120", "merges": "\u010a \u0120\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 5429, "token": "\u010a\u010a\u010a\u010a", "merges": "\u010a\u010a \u010a\u010a", "count": 0, "type": "remove by frequency"} -{"id": 12593, "token": "\u010a\u0120\u010a\u0120", "merges": "\u010a\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 11866, "token": "\u010a\u010a\u0109\u0109", "merges": "\u010a\u010a \u0109\u0109", "count": 0, "type": "remove by frequency"} -{"id": 13488, "token": "$]{}", "merges": "$ ]{}", "count": 0, "type": "remove by frequency"} -{"id": 17981, "token": ").](", "merges": "). ](", "count": 0, "type": "remove by frequency"} -{"id": 44578, "token": "\u0120\u00e2\u0122\u012b", "merges": "\u0120\u00e2\u0122 \u012b", "count": 0, "type": "remove by frequency"} -{"id": 32776, "token": "\u0120\u0120\u010a\u0120", "merges": "\u0120\u0120 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 34607, "token": "\u0120^[@", "merges": "\u0120 ^[@", "count": 0, "type": "remove by frequency"} -{"id": 49411, "token": "*\u00e2\u0122\u0132", "merges": "* \u00e2\u0122\u0132", "count": 0, "type": "remove by frequency"} -{"id": 21898, "token": "\u0120\u010a\u0120\u0120", "merges": "\u0120 \u010a\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 15971, "token": "\u010d\u010a\u010d\u010a", "merges": "\u010d\u010a \u010d\u010a", "count": 0, "type": "remove by frequency"} -{"id": 33653, "token": "^\\*^", "merges": "^\\ *^", "count": 0, "type": "remove by frequency"} -{"id": 30996, "token": ",^[@", "merges": ", ^[@", "count": 0, "type": "remove by frequency"} -{"id": 17278, "token": "^](#", "merges": "^ ](#", "count": 0, "type": "remove by frequency"} -{"id": 31522, "token": "\u0120\u00e2\u0123\u00a2", "merges": "\u0120\u00e2\u0123 \u00a2", "count": 0, "type": "remove by frequency"} -{"id": 30775, "token": "$\u00e2\u0122\u0135", "merges": "$ \u00e2\u0122\u0135", "count": 0, "type": "remove by frequency"} -{"id": 45581, "token": "dAtA", "merges": "d AtA", "count": 0, "type": "remove by frequency"} -{"id": 16413, "token": "\u010d\u010a\u0120\u0120", "merges": "\u010d \u010a\u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 28375, "token": "\u0120\u00ce\u00bcL", "merges": "\u0120\u00ce\u00bc L", "count": 0, "type": "remove by frequency"} -{"id": 10174, "token": ".^[@", "merges": ". ^[@", "count": 0, "type": "remove by frequency"} -{"id": 29547, "token": "]\\];", "merges": "]\\] ;", "count": 0, "type": "remove by frequency"} -{"id": 35768, "token": ",{{\\", "merges": ", {{\\", "count": 0, "type": "remove by frequency"} -{"id": 46230, "token": "\u00c2\u012a\u00c2\u0133", "merges": "\u00c2\u012a \u00c2\u0133", "count": 0, "type": "remove by frequency"} -{"id": 47279, "token": ":$$\\", "merges": ": $$\\", "count": 0, "type": "remove by frequency"} -{"id": 39945, "token": "~}{~", "merges": "~ }{~", "count": 0, "type": "remove by frequency"} -{"id": 18031, "token": ":\"){", "merges": ":\" ){", "count": 0, "type": "remove by frequency"} -{"id": 33525, "token": "\u010a\u010a\u010a\u0120", "merges": "\u010a\u010a \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 44898, "token": "\u010a\u0109\u0120\u0120", "merges": "\u010a\u0109 \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 47686, "token": "\\^[-", "merges": "\\^ [-", "count": 0, "type": "remove by frequency"} -{"id": 5413, "token": "]\\],", "merges": "]\\] ,", "count": 0, "type": "remove by frequency"} -{"id": 26362, "token": "\u0120{\u00c2\u00b6", "merges": "\u0120{ \u00c2\u00b6", "count": 0, "type": "remove by frequency"} -{"id": 46603, "token": "\u010a\u010a\u0120\u010a", "merges": "\u010a\u010a\u0120 \u010a", "count": 0, "type": "remove by frequency"} -{"id": 32214, "token": "]\\]^", "merges": "]\\] ^", "count": 0, "type": "remove by frequency"} -{"id": 27468, "token": "\u0120\\_[", "merges": "\u0120\\ _[", "count": 0, "type": "remove by frequency"} -{"id": 11459, "token": "\u010d\u010a\u0109\u0109", "merges": "\u010d \u010a\u0109\u0109", "count": 0, "type": "remove by frequency"} -{"id": 42804, "token": ")^[@", "merges": ") ^[@", "count": 0, "type": "remove by frequency"} -{"id": 8926, "token": "\u010a\u010a\u0120\u0120", "merges": "\u010a\u010a \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 27114, "token": ".(\\[", "merges": ". (\\[", "count": 0, "type": "remove by frequency"} -{"id": 31743, "token": "^\u00c2\u00ae^", "merges": "^\u00c2\u00ae ^", "count": 0, "type": "remove by frequency"} -{"id": 34985, "token": "\u00c2\u0142\u010a\u0120", "merges": "\u00c2\u0142 \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 30119, "token": "\u0120(\\>", "merges": "\u0120(\\ >", "count": 0, "type": "remove by frequency"} -{"id": 42419, "token": "\u0120(\\~", "merges": "\u0120(\\ ~", "count": 0, "type": "remove by frequency"} -{"id": 38679, "token": "\u0120\u0120\u0120\u010a", "merges": "\u0120\u0120\u0120 \u010a", "count": 0, "type": "remove by frequency"} -{"id": 341, "token": "\u0120\u0120\u0120", "merges": "\u0120\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 4888, "token": "\u010d\u010a\u010d", "merges": "\u010d\u010a \u010d", "count": 0, "type": "remove by frequency"} -{"id": 1772, "token": "\u010a\u0120\u0120", "merges": "\u010a \u0120\u0120", "count": 0, "type": "remove by frequency"} -{"id": 4256, "token": "\u010d\u010a\u0120", "merges": "\u010d \u010a\u0120", "count": 0, "type": "remove by frequency"} -{"id": 1744, "token": "\u010a\u010a\u0120", "merges": "\u010a\u010a \u0120", "count": 0, "type": "remove by frequency"} -{"id": 6886, "token": "\u010a\u010a\u0109", "merges": "\u010a\u010a \u0109", "count": 0, "type": "remove by frequency"} -{"id": 53775, "token": "\u00e8\u00ab\u00a1", "merges": "\u00e8\u00ab \u00a1", "count": 0, "type": "remove by frequency"} -{"id": 7951, "token": "\u010d\u010a\u0109", "merges": "\u010d \u010a\u0109", "count": 0, "type": "remove by frequency"} -{"id": 13769, "token": "\u0120\u0120\u010a", "merges": "\u0120\u0120 \u010a", "count": 0, "type": "remove by frequency"} -{"id": 22417, "token": "\u010a\u0109\u010a", "merges": "\u010a\u0109 \u010a", "count": 0, "type": "remove by frequency"} -{"id": 245, "token": "\u0120\u0120", "merges": "\u0120 \u0120", "count": 0, "type": "remove by frequency"} -{"id": 16981, "token": "\u010d\u010d", "merges": "\u010d \u010d", "count": 0, "type": "remove by frequency"} -{"id": 8675, "token": "\u010a\u010c", "merges": "\u010a \u010c", "count": 0, "type": "remove by frequency"} -{"id": 2379, "token": "\u010d\u010a", "merges": "\u010d \u010a", "count": 0, "type": "remove by frequency"} -{"id": 38572, "token": "\\\u0103", "merges": "\\ \u0103", "count": 0, "type": "remove by frequency"} -{"id": 20434, "token": "\u0120\u010d", "merges": "\u0120 \u010d", "count": 0, "type": "remove by frequency"} -{"id": 33059, "token": "\u0111\u0103", "merges": "\u0111 \u0103", "count": 0, "type": "remove by frequency"} -{"id": 27886, "token": "scriptscriptstyle", "merges": "script scriptstyle", "count": 1, "type": "remove by frequency"} -{"id": 43421, "token": "Sportspeople", "merges": "Sports people", "count": 1, "type": "remove by frequency"} -{"id": 27528, "token": "\u0120Appellants", "merges": "\u0120App ellants", "count": 1, "type": "remove by frequency"} -{"id": 18160, "token": "\u0120Petitioner", "merges": "\u0120Petition er", "count": 1, "type": "remove by frequency"} -{"id": 46558, "token": "Appellees", "merges": "App ellees", "count": 1, "type": "remove by frequency"} -{"id": 39834, "token": "\u0120Europese", "merges": "\u0120Europ ese", "count": 1, "type": "remove by frequency"} -{"id": 8860, "token": "wasysym", "merges": "was ysym", "count": 1, "type": "remove by frequency"} -{"id": 37276, "token": "}}}_{\\", "merges": "}} }_{\\", "count": 1, "type": "remove by frequency"} -{"id": 47996, "token": "\\!\\!\\!", "merges": "\\!\\! \\!", "count": 1, "type": "remove by frequency"} -{"id": 45251, "token": "}})$,", "merges": "}} )$,", "count": 1, "type": "remove by frequency"} -{"id": 19753, "token": "}}({\\", "merges": "}}( {\\", "count": 1, "type": "remove by frequency"} -{"id": 43214, "token": "}).$$", "merges": "} ).$$", "count": 1, "type": "remove by frequency"} -{"id": 30780, "token": "\u0120$({\\", "merges": "\u0120$( {\\", "count": 1, "type": "remove by frequency"} -{"id": 35539, "token": "NdEx", "merges": "Nd Ex", "count": 1, "type": "remove by frequency"} -{"id": 16079, "token": ".[]{", "merges": ".[ ]{", "count": 1, "type": "remove by frequency"} -{"id": 28381, "token": "\u0120A\u00ce\u00b2", "merges": "\u0120A \u00ce\u00b2", "count": 1, "type": "remove by frequency"} -{"id": 10148, "token": "\"}](", "merges": "\"} ](", "count": 1, "type": "remove by frequency"} -{"id": 17548, "token": "]-[@", "merges": "]- [@", "count": 1, "type": "remove by frequency"} -{"id": 33939, "token": "\u00c2\u0122\u00c2\u0124", "merges": "\u00c2\u0122 \u00c2\u0124", "count": 1, "type": "remove by frequency"} -{"id": 38913, "token": "\u00c2\u013a\u00c2\u00af", "merges": "\u00c2\u013a \u00c2\u00af", "count": 1, "type": "remove by frequency"} -{"id": 24237, "token": ".]{}", "merges": ". ]{}", "count": 1, "type": "remove by frequency"} -{"id": 47446, "token": "*](#", "merges": "* ](#", "count": 1, "type": "remove by frequency"} -{"id": 2756, "token": "\u010a\u010a\u010a", "merges": "\u010a\u010a \u010a", "count": 1, "type": "remove by frequency"} -{"id": 27596, "token": "^\u00c2\u00ae", "merges": "^ \u00c2\u00ae", "count": 1, "type": "remove by frequency"} -{"id": 19668, "token": "\u010a\u0120\u010a", "merges": "\u010a\u0120 \u010a", "count": 1, "type": "remove by frequency"} -{"id": 46224, "token": "\u0120immunoreactivity", "merges": "\u0120immunore activity", "count": 2, "type": "remove by frequency"} -{"id": 41321, "token": "\u0120supernatants", "merges": "\u0120supernat ants", "count": 2, "type": "remove by frequency"} -{"id": 49597, "token": "\u0120Petitioners", "merges": "\u0120Petition ers", "count": 2, "type": "remove by frequency"} -{"id": 12601, "token": "\u0120Plaintiffs", "merges": "\u0120Plaintiff s", "count": 2, "type": "remove by frequency"} -{"id": 34443, "token": "\u0120appellees", "merges": "\u0120app ellees", "count": 2, "type": "remove by frequency"} -{"id": 34319, "token": "\u0120CURIAM", "merges": "\u0120CURI AM", "count": 2, "type": "remove by frequency"} -{"id": 32334, "token": "ubottu", "merges": "ub ottu", "count": 2, "type": "remove by frequency"} -{"id": 35799, "token": "}}}$,", "merges": "}} }$,", "count": 2, "type": "remove by frequency"} -{"id": 38451, "token": "\u0120da\u00c3\u0141", "merges": "\u0120da \u00c3\u0141", "count": 2, "type": "remove by frequency"} -{"id": 33438, "token": "biggl", "merges": "big gl", "count": 2, "type": "remove by frequency"} -{"id": 47028, "token": ")\\|_{", "merges": ")\\ |_{", "count": 2, "type": "remove by frequency"} -{"id": 42465, "token": "}},$$", "merges": "}}, $$", "count": 2, "type": "remove by frequency"} -{"id": 37786, "token": "**](#", "merges": "** ](#", "count": 2, "type": "remove by frequency"} -{"id": 34494, "token": "]\\]).", "merges": "]\\] ).", "count": 2, "type": "remove by frequency"} -{"id": 33471, "token": "}}}$.", "merges": "}} }$.", "count": 2, "type": "remove by frequency"} -{"id": 44524, "token": "].$$", "merges": "]. $$", "count": 2, "type": "remove by frequency"} -{"id": 42213, "token": "\u0120\u00c2\u00b5l", "merges": "\u0120\u00c2\u00b5 l", "count": 2, "type": "remove by frequency"} -{"id": 21687, "token": ")]{}", "merges": ") ]{}", "count": 2, "type": "remove by frequency"} -{"id": 15267, "token": "\u0120\u010a\u010a", "merges": "\u0120 \u010a\u010a", "count": 2, "type": "remove by frequency"} -{"id": 40521, "token": "\u0120\u00c2\u0135", "merges": "\u0120\u00c2 \u0135", "count": 2, "type": "remove by frequency"} -{"id": 30843, "token": "\u010f\u0103", "merges": "\u010f \u0103", "count": 2, "type": "remove by frequency"} -{"id": 9517, "token": "\u0120Appellant", "merges": "\u0120App ellant", "count": 3, "type": "remove by frequency"} -{"id": 24730, "token": "\u0120APPEALS", "merges": "\u0120APPE ALS", "count": 3, "type": "remove by frequency"} -{"id": 37857, "token": "\\!\\!\\!\\!", "merges": "\\!\\! \\!\\!", "count": 3, "type": "remove by frequency"} -{"id": 36847, "token": "mathbbm", "merges": "mathbb m", "count": 3, "type": "remove by frequency"} -{"id": 8844, "token": "amsbsy", "merges": "ams bsy", "count": 3, "type": "remove by frequency"} -{"id": 47183, "token": "\u0120$\\|\\", "merges": "\u0120$\\ |\\", "count": 3, "type": "remove by frequency"} -{"id": 35420, "token": "}({{\\", "merges": "}( {{\\", "count": 3, "type": "remove by frequency"} -{"id": 15304, "token": "/\u00e2\u012a\u0134", "merges": "/ \u00e2\u012a\u0134", "count": 3, "type": "remove by frequency"} -{"id": 8088, "token": "\\[[@", "merges": "\\[ [@", "count": 3, "type": "remove by frequency"} -{"id": 28554, "token": "}}$-", "merges": "}} $-", "count": 3, "type": "remove by frequency"} -{"id": 30707, "token": "\u00c2\u013c\u00c2\u0126", "merges": "\u00c2\u013c \u00c2\u0126", "count": 3, "type": "remove by frequency"} -{"id": 18822, "token": "\u0120.$$", "merges": "\u0120. $$", "count": 3, "type": "remove by frequency"} -{"id": 26786, "token": "\u0120(\\<", "merges": "\u0120( \\<", "count": 3, "type": "remove by frequency"} -{"id": 52864, "token": "\u00e7\u0136\u0137", "merges": "\u00e7\u0136 \u0137", "count": 3, "type": "remove by frequency"} -{"id": 44847, "token": "\u0120Commissie", "merges": "\u0120Commiss ie", "count": 4, "type": "remove by frequency"} -{"id": 22706, "token": "\u0120Appellee", "merges": "\u0120App ellee", "count": 4, "type": "remove by frequency"} -{"id": 40968, "token": "\u0120Israelis", "merges": "\u0120Israel is", "count": 4, "type": "remove by frequency"} -{"id": 20089, "token": "lesssim", "merges": "less sim", "count": 4, "type": "remove by frequency"} -{"id": 38443, "token": "\u00e2\u012a\u0139\u00e2\u012a\u0139", "merges": "\u00e2\u012a\u0139 \u00e2\u012a\u0139", "count": 4, "type": "remove by frequency"} -{"id": 41610, "token": "\u0120GAPDH", "merges": "\u0120G APDH", "count": 4, "type": "remove by frequency"} -{"id": 41908, "token": "gtrsim", "merges": "gtr sim", "count": 4, "type": "remove by frequency"} -{"id": 39111, "token": "}}}(\\", "merges": "}} }(\\", "count": 4, "type": "remove by frequency"} -{"id": 40743, "token": "ubotu", "merges": "ubot u", "count": 4, "type": "remove by frequency"} -{"id": 18781, "token": "\u0120\u00e2\u013b\u00aa\"", "merges": "\u0120\u00e2\u013b\u00aa \"", "count": 4, "type": "remove by frequency"} -{"id": 45881, "token": "}=-\\", "merges": "}= -\\", "count": 4, "type": "remove by frequency"} -{"id": 24759, "token": "}}}^", "merges": "}} }^", "count": 4, "type": "remove by frequency"} -{"id": 19219, "token": "\u0120$^{", "merges": "\u0120$ ^{", "count": 4, "type": "remove by frequency"} -{"id": 52368, "token": "\u00e6\u00a8\u00ab", "merges": "\u00e6\u00a8 \u00ab", "count": 4, "type": "remove by frequency"} -{"id": 33307, "token": "\u00c2\u013d", "merges": "\u00c2 \u013d", "count": 4, "type": "remove by frequency"} -{"id": 42051, "token": "\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142", "merges": "\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142 \u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142", "count": 5, "type": "remove by frequency"} -{"id": 28072, "token": "\u0120Palestinians", "merges": "\u0120Palestin ians", "count": 5, "type": "remove by frequency"} -{"id": 43140, "token": "transfected", "merges": "trans fected", "count": 5, "type": "remove by frequency"} -{"id": 41383, "token": "Appellants", "merges": "App ellants", "count": 5, "type": "remove by frequency"} -{"id": 32291, "token": "geqslant", "merges": "geq slant", "count": 5, "type": "remove by frequency"} -{"id": 8828, "token": "mathrsfs", "merges": "math rsfs", "count": 5, "type": "remove by frequency"} -{"id": 48367, "token": "}}^{{\\", "merges": "}}^{ {\\", "count": 5, "type": "remove by frequency"} -{"id": 41897, "token": "\u00c3\u0132\u00c2\u00b5\u00c3\u0133", "merges": "\u00c3\u0132\u00c2\u00b5 \u00c3\u0133", "count": 5, "type": "remove by frequency"} -{"id": 25352, "token": "\u0120$\\{\\", "merges": "\u0120$\\ {\\", "count": 5, "type": "remove by frequency"} -{"id": 10878, "token": "}.$$", "merges": "} .$$", "count": 5, "type": "remove by frequency"} -{"id": 47264, "token": "}-{\\", "merges": "}- {\\", "count": 5, "type": "remove by frequency"} -{"id": 49372, "token": "_{\\{", "merges": "_{\\ {", "count": 5, "type": "remove by frequency"} -{"id": 41625, "token": "\u00c2\u00b8\u00c2\u012f", "merges": "\u00c2\u00b8 \u00c2\u012f", "count": 5, "type": "remove by frequency"} -{"id": 34908, "token": "\u0120qRT", "merges": "\u0120q RT", "count": 5, "type": "remove by frequency"} -{"id": 38606, "token": "\u0120\\}$", "merges": "\u0120\\ }$", "count": 5, "type": "remove by frequency"} -{"id": 37982, "token": "![\\[", "merges": "![ \\[", "count": 5, "type": "remove by frequency"} -{"id": 44957, "token": "\u0120\u00c2\u00b5L", "merges": "\u0120\u00c2\u00b5 L", "count": 5, "type": "remove by frequency"} -{"id": 47389, "token": "~\\]", "merges": "~ \\]", "count": 5, "type": "remove by frequency"} -{"id": 48128, "token": "\u0120CONSEQUENTIAL", "merges": "\u0120CON SEQUENTIAL", "count": 6, "type": "remove by frequency"} -{"id": 49505, "token": "\u0120NEGLIGENCE", "merges": "\u0120NE GLIGENCE", "count": 6, "type": "remove by frequency"} -{"id": 39604, "token": "\u0120Kommission", "merges": "\u0120Kom mission", "count": 6, "type": "remove by frequency"} -{"id": 38747, "token": "cjwatson", "merges": "cj watson", "count": 6, "type": "remove by frequency"} -{"id": 42827, "token": "\u0120microM", "merges": "\u0120micro M", "count": 6, "type": "remove by frequency"} -{"id": 35937, "token": "}}}}$", "merges": "}} }}$", "count": 6, "type": "remove by frequency"} -{"id": 48201, "token": "=\"'$(", "merges": "=\"' $(", "count": 6, "type": "remove by frequency"} -{"id": 37944, "token": "\u0120/**<", "merges": "\u0120/** <", "count": 6, "type": "remove by frequency"} -{"id": 16769, "token": "\u0120\u00ce\u00bcM", "merges": "\u0120\u00ce\u00bc M", "count": 6, "type": "remove by frequency"} -{"id": 16489, "token": "),$$", "merges": "), $$", "count": 6, "type": "remove by frequency"} -{"id": 46201, "token": ")}/\\", "merges": ")} /\\", "count": 6, "type": "remove by frequency"} -{"id": 43266, "token": "/\u00e2\u0122\u012d", "merges": "/ \u00e2\u0122\u012d", "count": 6, "type": "remove by frequency"} -{"id": 22805, "token": "\\}$,", "merges": "\\ }$,", "count": 6, "type": "remove by frequency"} -{"id": 45122, "token": "\u0120p\u00c3\u0125", "merges": "\u0120p \u00c3\u0125", "count": 6, "type": "remove by frequency"} -{"id": 26325, "token": "\u0120,$$", "merges": "\u0120, $$", "count": 6, "type": "remove by frequency"} -{"id": 17281, "token": ")](#", "merges": ") ](#", "count": 6, "type": "remove by frequency"} -{"id": 18413, "token": "]^,", "merges": "]^ ,", "count": 6, "type": "remove by frequency"} -{"id": 12004, "token": "]{.", "merges": "]{ .", "count": 6, "type": "remove by frequency"} -{"id": 6043, "token": "\u0120Plaintiff", "merges": "\u0120Pl aintiff", "count": 7, "type": "remove by frequency"} -{"id": 37368, "token": "\u0120Pursuant", "merges": "\u0120P ursuant", "count": 7, "type": "remove by frequency"} -{"id": 45622, "token": "\u0120EEOC", "merges": "\u0120EE OC", "count": 7, "type": "remove by frequency"} -{"id": 35998, "token": "\u0120Rptr", "merges": "\u0120R ptr", "count": 7, "type": "remove by frequency"} -{"id": 32222, "token": "}},{\\", "merges": "}}, {\\", "count": 7, "type": "remove by frequency"} -{"id": 44740, "token": "\u0120/*!<", "merges": "\u0120/*! <", "count": 7, "type": "remove by frequency"} -{"id": 13270, "token": "MOESM", "merges": "MO ESM", "count": 7, "type": "remove by frequency"} -{"id": 26271, "token": "\u0120\u00c2\u00b5M", "merges": "\u0120\u00c2\u00b5 M", "count": 7, "type": "remove by frequency"} -{"id": 35869, "token": "]{}(", "merges": "]{} (", "count": 7, "type": "remove by frequency"} -{"id": 38400, "token": "}={\\", "merges": "}= {\\", "count": 7, "type": "remove by frequency"} -{"id": 49538, "token": "^*$,", "merges": "^* $,", "count": 7, "type": "remove by frequency"} -{"id": 39300, "token": "\u0120s\u00c3\u0125", "merges": "\u0120s \u00c3\u0125", "count": 7, "type": "remove by frequency"} -{"id": 47044, "token": "|$.", "merges": "| $.", "count": 7, "type": "remove by frequency"} -{"id": 53546, "token": "\u00e8\u0126\u00a3", "merges": "\u00e8\u0126 \u00a3", "count": 7, "type": "remove by frequency"} -{"id": 20587, "token": "\u00c2\u012d", "merges": "\u00c2 \u012d", "count": 7, "type": "remove by frequency"} -{"id": 48924, "token": "//----------------------------------------------------------------", "merges": "// ----------------------------------------------------------------", "count": 8, "type": "remove by frequency"} -{"id": 26939, "token": "\u0120\u00c2\u0142\u00c2\u0142\u0120\u00c2\u0142\u00c2\u0142\u0120\u00c2\u0142\u00c2\u0142\u0120\u00c2\u0142\u00c2\u0142", "merges": "\u0120\u00c2\u0142\u00c2\u0142\u0120\u00c2\u0142\u00c2\u0142 \u0120\u00c2\u0142\u00c2\u0142\u0120\u00c2\u0142\u00c2\u0142", "count": 8, "type": "remove by frequency"} -{"id": 50048, "token": "\u0120Congressman", "merges": "\u0120Congress man", "count": 8, "type": "remove by frequency"} -{"id": 39193, "token": "\u0120Obamacare", "merges": "\u0120Ob amacare", "count": 8, "type": "remove by frequency"} -{"id": 48346, "token": "\u0120Comiss\u00c3\u00a3o", "merges": "\u0120Comiss \u00c3\u00a3o", "count": 8, "type": "remove by frequency"} -{"id": 46900, "token": "\u0120Lebanese", "merges": "\u0120Leban ese", "count": 8, "type": "remove by frequency"} -{"id": 47810, "token": "\u0120ocks\u00c3\u00a5", "merges": "\u0120ocks \u00c3\u00a5", "count": 8, "type": "remove by frequency"} -{"id": 37980, "token": "\u00c3\u0132\u00c2\u00b5\u00c3\u0132", "merges": "\u00c3\u0132\u00c2\u00b5 \u00c3\u0132", "count": 8, "type": "remove by frequency"} -{"id": 45237, "token": "\u0120shRNA", "merges": "\u0120sh RNA", "count": 8, "type": "remove by frequency"} -{"id": 30145, "token": "}^{{\\", "merges": "}^{ {\\", "count": 8, "type": "remove by frequency"} -{"id": 28691, "token": ")\u00e2\u0122\u0135(", "merges": ")\u00e2\u0122\u0135 (", "count": 8, "type": "remove by frequency"} -{"id": 1656, "token": "]\\].", "merges": "]\\] .", "count": 8, "type": "remove by frequency"} -{"id": 22367, "token": "![**", "merges": "![ **", "count": 8, "type": "remove by frequency"} -{"id": 31357, "token": "\u0120$\\|", "merges": "\u0120$\\ |", "count": 8, "type": "remove by frequency"} -{"id": 31782, "token": "\u00c3\u00a4\u00c2\u00ba", "merges": "\u00c3\u00a4 \u00c2\u00ba", "count": 8, "type": "remove by frequency"} -{"id": 27872, "token": "\u0120ALJ", "merges": "\u0120AL J", "count": 8, "type": "remove by frequency"} -{"id": 14640, "token": "},$$", "merges": "}, $$", "count": 8, "type": "remove by frequency"} -{"id": 11971, "token": "}}$.", "merges": "}} $.", "count": 8, "type": "remove by frequency"} -{"id": 37192, "token": "\\,{\\", "merges": "\\, {\\", "count": 8, "type": "remove by frequency"} -{"id": 35017, "token": "\u0120\u00c3\u013b", "merges": "\u0120\u00c3 \u013b", "count": 8, "type": "remove by frequency"} -{"id": 51501, "token": "\u00e5\u00b7\u0138", "merges": "\u00e5\u00b7 \u0138", "count": 8, "type": "remove by frequency"} -{"id": 28372, "token": "\u00c2\u0130", "merges": "\u00c2 \u0130", "count": 8, "type": "remove by frequency"} -{"id": 38110, "token": "\u0120resuspended", "merges": "\u0120res uspended", "count": 9, "type": "remove by frequency"} -{"id": 28906, "token": "\u0120neutrophils", "merges": "\u0120neutroph ils", "count": 9, "type": "remove by frequency"} -{"id": 12254, "token": "\u0120Defendants", "merges": "\u0120Def endants", "count": 9, "type": "remove by frequency"} -{"id": 46187, "token": "\u0120Comisi\u00c3\u00b3n", "merges": "\u0120Com isi\u00c3\u00b3n", "count": 9, "type": "remove by frequency"} -{"id": 46733, "token": "\u0120Islamist", "merges": "\u0120Islam ist", "count": 9, "type": "remove by frequency"} -{"id": 32777, "token": "\u0120lysates", "merges": "\u0120lys ates", "count": 9, "type": "remove by frequency"} -{"id": 47596, "token": "vphantom", "merges": "v phantom", "count": 9, "type": "remove by frequency"} -{"id": 43215, "token": "\u0120curiam", "merges": "\u0120cur iam", "count": 9, "type": "remove by frequency"} -{"id": 45409, "token": "\u0120Sunni", "merges": "\u0120Sun ni", "count": 9, "type": "remove by frequency"} -{"id": 39601, "token": "\u0120ERISA", "merges": "\u0120ER ISA", "count": 9, "type": "remove by frequency"} -{"id": 43654, "token": "\\|_{\\", "merges": "\\| _{\\", "count": 9, "type": "remove by frequency"} -{"id": 48003, "token": "\u0120HepG", "merges": "\u0120Hep G", "count": 9, "type": "remove by frequency"} -{"id": 28185, "token": ")}$.", "merges": ") }$.", "count": 9, "type": "remove by frequency"} -{"id": 20940, "token": "\u0120_{\\", "merges": "\u0120_ {\\", "count": 9, "type": "remove by frequency"} -{"id": 40413, "token": "\u00c3\u0132\u00c2\u00ba", "merges": "\u00c3\u0132 \u00c2\u00ba", "count": 9, "type": "remove by frequency"} -{"id": 50334, "token": "\u00e2\u012d\u012a", "merges": "\u00e2\u012d \u012a", "count": 9, "type": "remove by frequency"} -{"id": 18455, "token": "\u00c2\u0128", "merges": "\u00c2 \u0128", "count": 9, "type": "remove by frequency"} -{"id": 40055, "token": "\u0120Conservatives", "merges": "\u0120Conserv atives", "count": 10, "type": "remove by frequency"} -{"id": 41163, "token": "micromachines", "merges": "microm achines", "count": 10, "type": "remove by frequency"} -{"id": 46066, "token": "\u0120Jacksonville", "merges": "\u0120Jackson ville", "count": 10, "type": "remove by frequency"} -{"id": 28107, "token": "Appellee", "merges": "App ellee", "count": 10, "type": "remove by frequency"} -{"id": 49065, "token": "\u0120Texans", "merges": "\u0120Tex ans", "count": 10, "type": "remove by frequency"} -{"id": 37087, "token": "medsc", "merges": "med sc", "count": 10, "type": "remove by frequency"} -{"id": 40608, "token": "}^{(\\", "merges": "}^{ (\\", "count": 10, "type": "remove by frequency"} -{"id": 45656, "token": "\u0120{{{\\", "merges": "\u0120{ {{\\", "count": 10, "type": "remove by frequency"} -{"id": 24255, "token": "&=&\\", "merges": "&= &\\", "count": 10, "type": "remove by frequency"} -{"id": 32477, "token": ".\u00e2\u0122\u012d", "merges": ". \u00e2\u0122\u012d", "count": 10, "type": "remove by frequency"} -{"id": 28503, "token": "}}\\,", "merges": "}}\\ ,", "count": 10, "type": "remove by frequency"} -{"id": 40130, "token": "\u0120f\u00c3\u0125", "merges": "\u0120f \u00c3\u0125", "count": 10, "type": "remove by frequency"} -{"id": 48817, "token": "}+{\\", "merges": "}+ {\\", "count": 10, "type": "remove by frequency"} -{"id": 53776, "token": "\u00e8\u0143\u0123", "merges": "\u00e8\u0143 \u0123", "count": 10, "type": "remove by frequency"} -{"id": 13854, "token": "\u00c2\u0126", "merges": "\u00c2 \u0126", "count": 10, "type": "remove by frequency"} -{"id": 31779, "token": "=\"../../../../../", "merges": "=\"../../../../ ../", "count": 11, "type": "remove by frequency"} -{"id": 38726, "token": "\u0120downregulation", "merges": "\u0120down regulation", "count": 11, "type": "remove by frequency"} -{"id": 43275, "token": "\u0120Numerous", "merges": "\u0120Numer ous", "count": 11, "type": "remove by frequency"} -{"id": 36805, "token": "\u0120oocytes", "merges": "\u0120o ocytes", "count": 11, "type": "remove by frequency"} -{"id": 8861, "token": "upgreek", "merges": "up greek", "count": 11, "type": "remove by frequency"} -{"id": 25349, "token": "))/((-", "merges": "))/( (-", "count": 11, "type": "remove by frequency"} -{"id": 11127, "token": ").$$", "merges": "). $$", "count": 11, "type": "remove by frequency"} -{"id": 19562, "token": "\\}$.", "merges": "\\ }$.", "count": 11, "type": "remove by frequency"} -{"id": 26136, "token": "}}_\\", "merges": "}} _\\", "count": 11, "type": "remove by frequency"} -{"id": 39096, "token": "\u0120mAb", "merges": "\u0120m Ab", "count": 11, "type": "remove by frequency"} -{"id": 2947, "token": "],[@", "merges": "], [@", "count": 11, "type": "remove by frequency"} -{"id": 2479, "token": "\u0120([@", "merges": "\u0120( [@", "count": 11, "type": "remove by frequency"} -{"id": 47702, "token": "\u0120PCa", "merges": "\u0120PC a", "count": 11, "type": "remove by frequency"} -{"id": 50319, "token": "\u00e2\u012a\u0141", "merges": "\u00e2\u012a \u0141", "count": 11, "type": "remove by frequency"} -{"id": 54272, "token": "\u00e9\u0139\u00a2", "merges": "\u00e9\u0139 \u00a2", "count": 11, "type": "remove by frequency"} -{"id": 47571, "token": "\u0120immunostaining", "merges": "\u0120immunost aining", "count": 12, "type": "remove by frequency"} -{"id": 8867, "token": "oddsidemargin", "merges": "odds idemargin", "count": 12, "type": "remove by frequency"} -{"id": 38651, "token": "xymatrix", "merges": "xym atrix", "count": 12, "type": "remove by frequency"} -{"id": 26712, "token": "\u0120Atty", "merges": "\u0120At ty", "count": 12, "type": "remove by frequency"} -{"id": 14434, "token": "\u0120\"\u00e2\u013b\u00aa", "merges": "\u0120\" \u00e2\u013b\u00aa", "count": 12, "type": "remove by frequency"} -{"id": 33795, "token": "\u0120HeLa", "merges": "\u0120He La", "count": 12, "type": "remove by frequency"} -{"id": 48138, "token": ")}}{\\", "merges": ") }}{\\", "count": 12, "type": "remove by frequency"} -{"id": 16302, "token": "}}}_", "merges": "}} }_", "count": 12, "type": "remove by frequency"} -{"id": 43638, "token": "$.\\", "merges": "$. \\", "count": 12, "type": "remove by frequency"} -{"id": 14716, "token": "\u0120\u00c2\u0139", "merges": "\u0120\u00c2 \u0139", "count": 12, "type": "remove by frequency"} -{"id": 45509, "token": "\"}^", "merges": "\"} ^", "count": 12, "type": "remove by frequency"} -{"id": 43657, "token": ";{\\", "merges": "; {\\", "count": 12, "type": "remove by frequency"} -{"id": 19819, "token": "\u00c2\u0133", "merges": "\u00c2 \u0133", "count": 12, "type": "remove by frequency"} -{"id": 30477, "token": "\u0120upregulation", "merges": "\u0120up regulation", "count": 13, "type": "remove by frequency"} -{"id": 45707, "token": "}}})$", "merges": "}}} )$", "count": 13, "type": "remove by frequency"} -{"id": 28073, "token": "}}^\\", "merges": "}} ^\\", "count": 13, "type": "remove by frequency"} -{"id": 37185, "token": "):=\\", "merges": "): =\\", "count": 13, "type": "remove by frequency"} -{"id": 42691, "token": "jcmm", "merges": "jc mm", "count": 13, "type": "remove by frequency"} -{"id": 35138, "token": "\u0120\\[*", "merges": "\u0120\\[ *", "count": 13, "type": "remove by frequency"} -{"id": 45472, "token": "]{})", "merges": "]{} )", "count": 13, "type": "remove by frequency"} -{"id": 35729, "token": "}}+\\", "merges": "}} +\\", "count": 13, "type": "remove by frequency"} -{"id": 49960, "token": "\u0120$(-", "merges": "\u0120$ (-", "count": 13, "type": "remove by frequency"} -{"id": 37860, "token": "/{\\", "merges": "/ {\\", "count": 13, "type": "remove by frequency"} -{"id": 33763, "token": "=\"../../../../../../", "merges": "=\"../../../../ ../../", "count": 14, "type": "remove by frequency"} -{"id": 34682, "token": "\u0120Redistributions", "merges": "\u0120Redist ributions", "count": 14, "type": "remove by frequency"} -{"id": 34122, "token": "\u0120downregulated", "merges": "\u0120down regulated", "count": 14, "type": "remove by frequency"} -{"id": 45233, "token": "hookrightarrow", "merges": "hook rightarrow", "count": 14, "type": "remove by frequency"} -{"id": 30960, "token": "\u0120luciferase", "merges": "\u0120luc iferase", "count": 14, "type": "remove by frequency"} -{"id": 8167, "token": "\u0120Defendant", "merges": "\u0120Def endant", "count": 14, "type": "remove by frequency"} -{"id": 46542, "token": "\u0120exosomes", "merges": "\u0120ex osomes", "count": 14, "type": "remove by frequency"} -{"id": 49169, "token": "GLIGENCE", "merges": "GL IGENCE", "count": 14, "type": "remove by frequency"} -{"id": 32625, "token": "\u0120Taliban", "merges": "\u0120Tal iban", "count": 14, "type": "remove by frequency"} -{"id": 40557, "token": "\u0120\u00cf\u012e\u00cf\u0126\u00ce\u00b9", "merges": "\u0120\u00cf\u012e \u00cf\u0126\u00ce\u00b9", "count": 14, "type": "remove by frequency"} -{"id": 37077, "token": ")\\].", "merges": ") \\].", "count": 14, "type": "remove by frequency"} -{"id": 17137, "token": "})$.", "merges": "} )$.", "count": 14, "type": "remove by frequency"} -{"id": 35655, "token": "\u0120BJP", "merges": "\u0120B JP", "count": 14, "type": "remove by frequency"} -{"id": 50331, "token": "\u00e2\u012b\u00aa", "merges": "\u00e2\u012b \u00aa", "count": 14, "type": "remove by frequency"} -{"id": 50592, "token": "\u00e4\u00be\u00b7", "merges": "\u00e4\u00be \u00b7", "count": 14, "type": "remove by frequency"} -{"id": 14681, "token": "\u00c2\u012a", "merges": "\u00c2 \u012a", "count": 14, "type": "remove by frequency"} -{"id": 20006, "token": "\u00c2\u013a", "merges": "\u00c2 \u013a", "count": 14, "type": "remove by frequency"} -{"id": 48687, "token": "\u0120findViewById", "merges": "\u0120find ViewById", "count": 15, "type": "remove by frequency"} -{"id": 44694, "token": "\u0120lysed", "merges": "\u0120lys ed", "count": 15, "type": "remove by frequency"} -{"id": 46880, "token": "\u0120///<", "merges": "\u0120/// <", "count": 15, "type": "remove by frequency"} -{"id": 35487, "token": "\\]\\].", "merges": "\\] \\].", "count": 15, "type": "remove by frequency"} -{"id": 32399, "token": "\u0120----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------", "merges": "\u0120 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------", "count": 16, "type": "remove by frequency"} -{"id": 45373, "token": "\u0120Australians", "merges": "\u0120Austral ians", "count": 16, "type": "remove by frequency"} -{"id": 43607, "token": "\u0120Respondents", "merges": "\u0120Respond ents", "count": 16, "type": "remove by frequency"} -{"id": 49950, "token": "\u0120CONCLUSION", "merges": "\u0120CON CLUSION", "count": 16, "type": "remove by frequency"} -{"id": 37402, "token": "imonit", "merges": "imon it", "count": 16, "type": "remove by frequency"} -{"id": 49819, "token": "hskip", "merges": "h skip", "count": 16, "type": "remove by frequency"} -{"id": 41780, "token": ")}=\\", "merges": ")} =\\", "count": 16, "type": "remove by frequency"} -{"id": 38229, "token": "\u0120n\u00c3\u0125", "merges": "\u0120n \u00c3\u0125", "count": 16, "type": "remove by frequency"} -{"id": 50327, "token": "\u00e2\u012b\u0134", "merges": "\u00e2\u012b \u0134", "count": 16, "type": "remove by frequency"} -{"id": 49806, "token": "]$$", "merges": "] $$", "count": 16, "type": "remove by frequency"} -{"id": 51628, "token": "\u00e5\u00bd\u00ab", "merges": "\u00e5\u00bd \u00ab", "count": 16, "type": "remove by frequency"} -{"id": 18994, "token": "\u00c2\u0129", "merges": "\u00c2 \u0129", "count": 16, "type": "remove by frequency"} -{"id": 24372, "token": "\u00c2\u012b", "merges": "\u00c2 \u012b", "count": 16, "type": "remove by frequency"} -{"id": 26261, "token": "\u00c2\u012c", "merges": "\u00c2 \u012c", "count": 16, "type": "remove by frequency"} -{"id": 46338, "token": "\u0120Sentencing", "merges": "\u0120Sent encing", "count": 17, "type": "remove by frequency"} -{"id": 43029, "token": "AFFIRMED", "merges": "A FFIRMED", "count": 17, "type": "remove by frequency"} -{"id": 28696, "token": "FFIRMED", "merges": "FFIR MED", "count": 17, "type": "remove by frequency"} -{"id": 49659, "token": "\u0120j\u00c3\u00a4sen", "merges": "\u0120j\u00c3\u00a4 sen", "count": 17, "type": "remove by frequency"} -{"id": 27790, "token": "}}_{{\\", "merges": "}} _{{\\", "count": 17, "type": "remove by frequency"} -{"id": 38225, "token": ")}_{\\", "merges": ")} _{\\", "count": 17, "type": "remove by frequency"} -{"id": 49406, "token": "\u0120\u00c3\u00aent", "merges": "\u0120\u00c3\u00ae nt", "count": 17, "type": "remove by frequency"} -{"id": 10791, "token": "}({\\", "merges": "}( {\\", "count": 17, "type": "remove by frequency"} -{"id": 39294, "token": ")={\\", "merges": ")= {\\", "count": 17, "type": "remove by frequency"} -{"id": 40227, "token": ")}$$", "merges": ") }$$", "count": 17, "type": "remove by frequency"} -{"id": 23125, "token": "pntd", "merges": "pnt d", "count": 17, "type": "remove by frequency"} -{"id": 43044, "token": "\u0120\u00c2\u0143", "merges": "\u0120\u00c2 \u0143", "count": 17, "type": "remove by frequency"} -{"id": 44701, "token": "\u0120overexpressed", "merges": "\u0120overex pressed", "count": 18, "type": "remove by frequency"} -{"id": 45359, "token": "\u0120Africans", "merges": "\u0120Afric ans", "count": 18, "type": "remove by frequency"} -{"id": 10131, "token": "\u0120uintptr", "merges": "\u0120uint ptr", "count": 18, "type": "remove by frequency"} -{"id": 41538, "token": "\u0120Corbyn", "merges": "\u0120Corb yn", "count": 18, "type": "remove by frequency"} -{"id": 45094, "token": "\u0120ocks", "merges": "\u0120o cks", "count": 18, "type": "remove by frequency"} -{"id": 29230, "token": "\u0120Figs", "merges": "\u0120Fig s", "count": 18, "type": "remove by frequency"} -{"id": 16990, "token": "\u00c3\u0125\u00c3\u0124", "merges": "\u00c3\u0125 \u00c3\u0124", "count": 18, "type": "remove by frequency"} -{"id": 18251, "token": "\u00c3\u0132\u00c2\u00b5", "merges": "\u00c3\u0132 \u00c2\u00b5", "count": 18, "type": "remove by frequency"} -{"id": 45662, "token": "\\]](", "merges": "\\] ](", "count": 18, "type": "remove by frequency"} -{"id": 21067, "token": "\u0120\u00c5\u0141i", "merges": "\u0120\u00c5\u0141 i", "count": 18, "type": "remove by frequency"} -{"id": 43742, "token": "})$$", "merges": "})$ $", "count": 18, "type": "remove by frequency"} -{"id": 34813, "token": "|_{\\", "merges": "| _{\\", "count": 18, "type": "remove by frequency"} -{"id": 28903, "token": "\u0120\u00ce\u00bcl", "merges": "\u0120\u00ce\u00bc l", "count": 18, "type": "remove by frequency"} -{"id": 22001, "token": "-{\\", "merges": "- {\\", "count": 18, "type": "remove by frequency"} -{"id": 36576, "token": "$}}", "merges": "$ }}", "count": 18, "type": "remove by frequency"} -{"id": 25371, "token": "\u00c2\u0138", "merges": "\u00c2 \u0138", "count": 18, "type": "remove by frequency"} -{"id": 17858, "token": "\u0120transfected", "merges": "\u0120trans fected", "count": 19, "type": "remove by frequency"} -{"id": 15707, "token": "\u0120]{}", "merges": "\u0120 ]{}", "count": 19, "type": "remove by frequency"} -{"id": 16746, "token": "({{\\", "merges": "( {{\\", "count": 19, "type": "remove by frequency"} -{"id": 34801, "token": "))$.", "merges": ") )$.", "count": 19, "type": "remove by frequency"} -{"id": 7610, "token": "ijms", "merges": "ij ms", "count": 19, "type": "remove by frequency"} -{"id": 20236, "token": "\u00c2\u013b", "merges": "\u00c2 \u013b", "count": 19, "type": "remove by frequency"} -{"id": 29881, "token": "\u0120centrifuged", "merges": "\u0120centrifug ed", "count": 20, "type": "remove by frequency"} -{"id": 38421, "token": "Defendants", "merges": "Def endants", "count": 20, "type": "remove by frequency"} -{"id": 19747, "token": "boldmath", "merges": "bold math", "count": 20, "type": "remove by frequency"} -{"id": 42795, "token": "\u0120Saddam", "merges": "\u0120S addam", "count": 20, "type": "remove by frequency"} -{"id": 41136, "token": "\u0120Qaeda", "merges": "\u0120Q aeda", "count": 20, "type": "remove by frequency"} -{"id": 45605, "token": "\u0120CXCR", "merges": "\u0120CX CR", "count": 20, "type": "remove by frequency"} -{"id": 38850, "token": "Bankr", "merges": "Bank r", "count": 20, "type": "remove by frequency"} -{"id": 27501, "token": "\u0120[]{", "merges": "\u0120[ ]{", "count": 20, "type": "remove by frequency"} -{"id": 40344, "token": "\u0120immunoblot", "merges": "\u0120immun oblot", "count": 21, "type": "remove by frequency"} -{"id": 44987, "token": "\u0120Fran\u00c3\u00a7ois", "merges": "\u0120Fran \u00c3\u00a7ois", "count": 21, "type": "remove by frequency"} -{"id": 32846, "token": "\u0120Canadians", "merges": "\u0120Canad ians", "count": 21, "type": "remove by frequency"} -{"id": 47732, "token": "\u0120Steelers", "merges": "\u0120Steel ers", "count": 21, "type": "remove by frequency"} -{"id": 49080, "token": "\u0120PROFITS", "merges": "\u0120PROF ITS", "count": 21, "type": "remove by frequency"} -{"id": 46795, "token": "\u0120Bosnia", "merges": "\u0120Bos nia", "count": 21, "type": "remove by frequency"} -{"id": 26071, "token": "Errorf", "merges": "Error f", "count": 21, "type": "remove by frequency"} -{"id": 28171, "token": "bigcup", "merges": "big cup", "count": 21, "type": "remove by frequency"} -{"id": 42643, "token": "_{|", "merges": "_{ |", "count": 21, "type": "remove by frequency"} -{"id": 19481, "token": "\u0120\\~", "merges": "\u0120\\ ~", "count": 21, "type": "remove by frequency"} -{"id": 52422, "token": "\u00e6\u00ae\u0143", "merges": "\u00e6\u00ae \u0143", "count": 21, "type": "remove by frequency"} -{"id": 35329, "token": "\u0120Conclusions", "merges": "\u0120Con clusions", "count": 22, "type": "remove by frequency"} -{"id": 49059, "token": "\u0120Attorneys", "merges": "\u0120Att orneys", "count": 22, "type": "remove by frequency"} -{"id": 34538, "token": "\u0120Pakistani", "merges": "\u0120Pak istani", "count": 22, "type": "remove by frequency"} -{"id": 45002, "token": "\u0120\u00cf\u0122\u00ce\u00bf\u00ce\u00bb", "merges": "\u0120\u00cf\u0122 \u00ce\u00bf\u00ce\u00bb", "count": 22, "type": "remove by frequency"} -{"id": 48443, "token": "\u0120eluted", "merges": "\u0120el uted", "count": 22, "type": "remove by frequency"} -{"id": 12945, "token": "}$$\\", "merges": "}$ $\\", "count": 22, "type": "remove by frequency"} -{"id": 24281, "token": "\u0120$-$", "merges": "\u0120$- $", "count": 22, "type": "remove by frequency"} -{"id": 43736, "token": "\u0120$<$", "merges": "\u0120$< $", "count": 22, "type": "remove by frequency"} -{"id": 34210, "token": "\u00c2\u0140", "merges": "\u00c2 \u0140", "count": 22, "type": "remove by frequency"} -{"id": 19326, "token": "scriptstyle", "merges": "script style", "count": 23, "type": "remove by frequency"} -{"id": 47536, "token": "\u0120adipocytes", "merges": "\u0120adip ocytes", "count": 23, "type": "remove by frequency"} -{"id": 35335, "token": "\u0120Parlement", "merges": "\u0120Par lement", "count": 23, "type": "remove by frequency"} -{"id": 33097, "token": "\u0120Catholics", "merges": "\u0120Cath olics", "count": 23, "type": "remove by frequency"} -{"id": 22049, "token": "\u0120\u00e2\u0122\u0135,", "merges": "\u0120\u00e2\u0122\u0135 ,", "count": 23, "type": "remove by frequency"} -{"id": 3951, "token": "^[@", "merges": "^ [@", "count": 23, "type": "remove by frequency"} -{"id": 46986, "token": "\\,$", "merges": "\\, $", "count": 23, "type": "remove by frequency"} -{"id": 50376, "token": "\u00e2\u0139\u0142", "merges": "\u00e2\u0139 \u0142", "count": 23, "type": "remove by frequency"} -{"id": 23002, "token": "+{\\", "merges": "+ {\\", "count": 23, "type": "remove by frequency"} -{"id": 17719, "token": "\u00c2\u0134", "merges": "\u00c2 \u0134", "count": 23, "type": "remove by frequency"} -{"id": 12555, "token": "\u00c2\u0139", "merges": "\u00c2 \u0139", "count": 23, "type": "remove by frequency"} -{"id": 37826, "token": "xrightarrow", "merges": "x rightarrow", "count": 24, "type": "remove by frequency"} -{"id": 50201, "token": "\u0120mycket", "merges": "\u0120my cket", "count": 24, "type": "remove by frequency"} -{"id": 32885, "token": "\u0120Hamas", "merges": "\u0120Ham as", "count": 24, "type": "remove by frequency"} -{"id": 32673, "token": "}}^{(", "merges": "}} ^{(", "count": 24, "type": "remove by frequency"} -{"id": 47260, "token": "\u0120}^{", "merges": "\u0120} ^{", "count": 24, "type": "remove by frequency"} -{"id": 37299, "token": "}\\}$", "merges": "}\\ }$", "count": 24, "type": "remove by frequency"} -{"id": 44454, "token": "\u0120JNK", "merges": "\u0120J NK", "count": 24, "type": "remove by frequency"} -{"id": 50807, "token": "\u00e5\u012b\u00b7", "merges": "\u00e5\u012b \u00b7", "count": 24, "type": "remove by frequency"} -{"id": 8562, "token": "]^.", "merges": "]^ .", "count": 24, "type": "remove by frequency"} -{"id": 43009, "token": "\u0120proinflammatory", "merges": "\u0120pro inflammatory", "count": 25, "type": "remove by frequency"} -{"id": 33053, "token": "biomolecules", "merges": "biom olecules", "count": 25, "type": "remove by frequency"} -{"id": 41172, "token": "\u0120Liberals", "merges": "\u0120Liber als", "count": 25, "type": "remove by frequency"} -{"id": 49248, "token": "\u0120TEXAS", "merges": "\u0120TEX AS", "count": 25, "type": "remove by frequency"} -{"id": 26398, "token": "_{(\\", "merges": "_{ (\\", "count": 25, "type": "remove by frequency"} -{"id": 49793, "token": "-\u00e2\u0124\u00ac", "merges": "- \u00e2\u0124\u00ac", "count": 25, "type": "remove by frequency"} -{"id": 24680, "token": "\u0120upregulated", "merges": "\u0120up regulated", "count": 26, "type": "remove by frequency"} -{"id": 30079, "token": "})=\\", "merges": "}) =\\", "count": 26, "type": "remove by frequency"} -{"id": 19181, "token": "]{}.", "merges": "]{} .", "count": 26, "type": "remove by frequency"} -{"id": 13704, "token": "\u0120\u00c2\u0142\u00c2\u0142\u0120\u00c2\u0142\u00c2\u0142", "merges": "\u0120\u00c2\u0142\u00c2\u0142 \u0120\u00c2\u0142\u00c2\u0142", "count": 27, "type": "remove by frequency"} -{"id": 33682, "token": "\u0120Appellate", "merges": "\u0120App ellate", "count": 27, "type": "remove by frequency"} -{"id": 36645, "token": "Plaintiffs", "merges": "Plaintiff s", "count": 27, "type": "remove by frequency"} -{"id": 47654, "token": "\u0120Statutes", "merges": "\u0120Stat utes", "count": 27, "type": "remove by frequency"} -{"id": 42685, "token": "\u0120europea", "merges": "\u0120europe a", "count": 27, "type": "remove by frequency"} -{"id": 18866, "token": ",\\,\\", "merges": ",\\ ,\\", "count": 27, "type": "remove by frequency"} -{"id": 6334, "token": ".[@", "merges": ". [@", "count": 27, "type": "remove by frequency"} -{"id": 50295, "token": "\u00e2\u0122\u00a5", "merges": "\u00e2\u0122 \u00a5", "count": 27, "type": "remove by frequency"} -{"id": 16605, "token": "\u00c2\u013e", "merges": "\u00c2 \u013e", "count": 27, "type": "remove by frequency"} -{"id": 17237, "token": "\u00c2\u0137", "merges": "\u00c2 \u0137", "count": 27, "type": "remove by frequency"} -{"id": 46822, "token": "\u0120AMPK", "merges": "\u0120AMP K", "count": 28, "type": "remove by frequency"} -{"id": 42762, "token": "\u0120CXCL", "merges": "\u0120CX CL", "count": 28, "type": "remove by frequency"} -{"id": 46479, "token": "\u0120\\|_{", "merges": "\u0120\\| _{", "count": 28, "type": "remove by frequency"} -{"id": 33570, "token": "\u0120Ariz", "merges": "\u0120Ari z", "count": 28, "type": "remove by frequency"} -{"id": 34942, "token": "}$).", "merges": "}$ ).", "count": 28, "type": "remove by frequency"} -{"id": 42274, "token": "]{}[", "merges": "]{} [", "count": 28, "type": "remove by frequency"} -{"id": 49760, "token": "IRQHandler", "merges": "IRQ Handler", "count": 29, "type": "remove by frequency"} -{"id": 38481, "token": "\u0120integrin", "merges": "\u0120integr in", "count": 29, "type": "remove by frequency"} -{"id": 48738, "token": "\u00e2\u0122\u0125\u00e2\u0122\u0125\u00e2\u0122\u0125", "merges": "\u00e2\u0122\u0125\u00e2\u0122\u0125 \u00e2\u0122\u0125", "count": 29, "type": "remove by frequency"} -{"id": 31667, "token": "\\}_{", "merges": "\\ }_{", "count": 29, "type": "remove by frequency"} -{"id": 50791, "token": "\u00e5\u012b\u012b", "merges": "\u00e5\u012b \u012b", "count": 29, "type": "remove by frequency"} -{"id": 41346, "token": "\u0120neurodegenerative", "merges": "\u0120neurode generative", "count": 30, "type": "remove by frequency"} -{"id": 46527, "token": "\u0120Magistrate", "merges": "\u0120Mag istrate", "count": 30, "type": "remove by frequency"} -{"id": 47351, "token": "\u0120chemokine", "merges": "\u0120chemok ine", "count": 30, "type": "remove by frequency"} -{"id": 47598, "token": "\u0120Kavanaugh", "merges": "\u0120K avanaugh", "count": 30, "type": "remove by frequency"} -{"id": 20866, "token": "leqslant", "merges": "leq slant", "count": 30, "type": "remove by frequency"} -{"id": 47766, "token": "\u0120Kerala", "merges": "\u0120K erala", "count": 30, "type": "remove by frequency"} -{"id": 46034, "token": "\u0120RNase", "merges": "\u0120RN ase", "count": 30, "type": "remove by frequency"} -{"id": 39939, "token": "\u0120h\u00c3\u00a4n", "merges": "\u0120h \u00c3\u00a4n", "count": 30, "type": "remove by frequency"} -{"id": 17334, "token": "\u0120$|\\", "merges": "\u0120$ |\\", "count": 30, "type": "remove by frequency"} -{"id": 42637, "token": "}}-\\", "merges": "}} -\\", "count": 30, "type": "remove by frequency"} -{"id": 39106, "token": "}^+", "merges": "}^ +", "count": 30, "type": "remove by frequency"} -{"id": 21837, "token": "|{\\", "merges": "| {\\", "count": 30, "type": "remove by frequency"} -{"id": 43748, "token": "\u0120Caucasian", "merges": "\u0120Cauc asian", "count": 31, "type": "remove by frequency"} -{"id": 42173, "token": "\u0120Amended", "merges": "\u0120Am ended", "count": 31, "type": "remove by frequency"} -{"id": 46699, "token": "\u0120Somalia", "merges": "\u0120Somal ia", "count": 31, "type": "remove by frequency"} -{"id": 24923, "token": "\u0120Iraqi", "merges": "\u0120Iraq i", "count": 31, "type": "remove by frequency"} -{"id": 32603, "token": "textup", "merges": "text up", "count": 31, "type": "remove by frequency"} -{"id": 39004, "token": "\u0120Arabs", "merges": "\u0120Ar abs", "count": 31, "type": "remove by frequency"} -{"id": 17738, "token": "}_{{\\", "merges": "}_{ {\\", "count": 31, "type": "remove by frequency"} -{"id": 47402, "token": "})^{\\", "merges": "}) ^{\\", "count": 31, "type": "remove by frequency"} -{"id": 30634, "token": ")_{\\", "merges": ") _{\\", "count": 31, "type": "remove by frequency"} -{"id": 2367, "token": "\u0120${\\", "merges": "\u0120$ {\\", "count": 31, "type": "remove by frequency"} -{"id": 25778, "token": "\u0120\u00c3\u0130", "merges": "\u0120\u00c3 \u0130", "count": 31, "type": "remove by frequency"} -{"id": 13033, "token": "\u0120\u00c3\u0132", "merges": "\u0120\u00c3 \u0132", "count": 31, "type": "remove by frequency"} -{"id": 11400, "token": ",{\\", "merges": ", {\\", "count": 31, "type": "remove by frequency"} -{"id": 40598, "token": "\u0120Schr\u00c3\u00b6dinger", "merges": "\u0120Schr \u00c3\u00b6dinger", "count": 32, "type": "remove by frequency"} -{"id": 33041, "token": "\u0120Thereafter", "merges": "\u0120There after", "count": 32, "type": "remove by frequency"} -{"id": 36325, "token": "\u0120immunost", "merges": "\u0120immun ost", "count": 32, "type": "remove by frequency"} -{"id": 46142, "token": "\u0120luminal", "merges": "\u0120lum inal", "count": 32, "type": "remove by frequency"} -{"id": 49405, "token": "\u00c3\u00a9sident", "merges": "\u00c3\u00a9s ident", "count": 32, "type": "remove by frequency"} -{"id": 42680, "token": "\u0120Comiss", "merges": "\u0120Com iss", "count": 32, "type": "remove by frequency"} -{"id": 35496, "token": "**--**", "merges": "** --**", "count": 32, "type": "remove by frequency"} -{"id": 29748, "token": "}}{{\\", "merges": "}} {{\\", "count": 32, "type": "remove by frequency"} -{"id": 49408, "token": "\u0120RPMI", "merges": "\u0120RP MI", "count": 32, "type": "remove by frequency"} -{"id": 39099, "token": "\u0120HDAC", "merges": "\u0120HD AC", "count": 32, "type": "remove by frequency"} -{"id": 40125, "token": "]^{\\", "merges": "] ^{\\", "count": 32, "type": "remove by frequency"} -{"id": 45637, "token": "\u0120(\\#", "merges": "\u0120(\\ #", "count": 32, "type": "remove by frequency"} -{"id": 50332, "token": "\u00e2\u012b\u00ab", "merges": "\u00e2\u012b \u00ab", "count": 32, "type": "remove by frequency"} -{"id": 50339, "token": "\u00e2\u0136\u0128", "merges": "\u00e2\u0136 \u0128", "count": 32, "type": "remove by frequency"} -{"id": 36929, "token": "\u0120malignancies", "merges": "\u0120malign ancies", "count": 33, "type": "remove by frequency"} -{"id": 42624, "token": "\u0120Archbishop", "merges": "\u0120Arch bishop", "count": 33, "type": "remove by frequency"} -{"id": 34045, "token": "\u0120skulle", "merges": "\u0120sk ulle", "count": 33, "type": "remove by frequency"} -{"id": 38220, "token": "\u0120Assad", "merges": "\u0120Ass ad", "count": 33, "type": "remove by frequency"} -{"id": 49869, "token": "\u0120----,", "merges": "\u0120---- ,", "count": 33, "type": "remove by frequency"} -{"id": 48963, "token": ")|$(", "merges": ")| $(", "count": 33, "type": "remove by frequency"} -{"id": 20744, "token": ";\\;\\", "merges": ";\\ ;\\", "count": 33, "type": "remove by frequency"} -{"id": 14082, "token": "\u0120&=&", "merges": "\u0120&= &", "count": 33, "type": "remove by frequency"} -{"id": 28599, "token": "\u0120\u00c2\u00bb,", "merges": "\u0120\u00c2\u00bb ,", "count": 33, "type": "remove by frequency"} -{"id": 16550, "token": ")$-", "merges": ")$ -", "count": 33, "type": "remove by frequency"} -{"id": 15728, "token": "}$-", "merges": "}$ -", "count": 33, "type": "remove by frequency"} -{"id": 33671, "token": "\u0120Europeans", "merges": "\u0120Europe ans", "count": 34, "type": "remove by frequency"} -{"id": 39734, "token": "\u0120cytosolic", "merges": "\u0120cytos olic", "count": 34, "type": "remove by frequency"} -{"id": 10809, "token": "\u0120$\\{", "merges": "\u0120$\\ {", "count": 34, "type": "remove by frequency"} -{"id": 35386, "token": "Rptr", "merges": "R ptr", "count": 34, "type": "remove by frequency"} -{"id": 50381, "token": "\u00e2\u013a\u00bc", "merges": "\u00e2\u013a \u00bc", "count": 34, "type": "remove by frequency"} -{"id": 54390, "token": "\u00e9\u013e\u0133", "merges": "\u00e9\u013e \u0133", "count": 34, "type": "remove by frequency"} -{"id": 36889, "token": "\u0120immunoprecip", "merges": "\u0120immun oprecip", "count": 35, "type": "remove by frequency"} -{"id": 16492, "token": "\u0120Muslims", "merges": "\u0120Muslim s", "count": 35, "type": "remove by frequency"} -{"id": 36985, "token": "bigoplus", "merges": "big oplus", "count": 35, "type": "remove by frequency"} -{"id": 38414, "token": ")}}\\", "merges": ") }}\\", "count": 35, "type": "remove by frequency"} -{"id": 52691, "token": "\u00e6\u00ba\u00bc", "merges": "\u00e6\u00ba \u00bc", "count": 35, "type": "remove by frequency"} -{"id": 20481, "token": "]$.", "merges": "] $.", "count": 35, "type": "remove by frequency"} -{"id": 52959, "token": "\u00e7\u013b\u00a1", "merges": "\u00e7\u013b \u00a1", "count": 35, "type": "remove by frequency"} -{"id": 37805, "token": "\u0120Charleston", "merges": "\u0120Charl eston", "count": 36, "type": "remove by frequency"} -{"id": 39094, "token": "\u0120Netanyahu", "merges": "\u0120Net anyahu", "count": 36, "type": "remove by frequency"} -{"id": 29762, "token": "fasterxml", "merges": "faster xml", "count": 36, "type": "remove by frequency"} -{"id": 41340, "token": "\u0120Kurdish", "merges": "\u0120Kurd ish", "count": 36, "type": "remove by frequency"} -{"id": 45962, "token": "\u0120belang", "merges": "\u0120bel ang", "count": 36, "type": "remove by frequency"} -{"id": 44918, "token": "\u0120my\u00c3\u00b6s", "merges": "\u0120my \u00c3\u00b6s", "count": 36, "type": "remove by frequency"} -{"id": 13093, "token": "\\!\\!", "merges": "\\! \\!", "count": 36, "type": "remove by frequency"} -{"id": 49694, "token": "\u0120([*", "merges": "\u0120([ *", "count": 36, "type": "remove by frequency"} -{"id": 47942, "token": "^*(\\", "merges": "^* (\\", "count": 36, "type": "remove by frequency"} -{"id": 19022, "token": "^{{\\", "merges": "^{ {\\", "count": 36, "type": "remove by frequency"} -{"id": 27618, "token": "[]$", "merges": "[] $", "count": 36, "type": "remove by frequency"} -{"id": 33541, "token": "\u00c5\u00a3i", "merges": "\u00c5\u00a3 i", "count": 36, "type": "remove by frequency"} -{"id": 4253, "token": "){#", "merges": "){ #", "count": 36, "type": "remove by frequency"} -{"id": 27399, "token": "\u00c2\u0135", "merges": "\u00c2 \u0135", "count": 36, "type": "remove by frequency"} -{"id": 49920, "token": "\u0120Personally", "merges": "\u0120Person ally", "count": 37, "type": "remove by frequency"} -{"id": 39492, "token": "\u0120Louisville", "merges": "\u0120Louis ville", "count": 37, "type": "remove by frequency"} -{"id": 39267, "token": "omitempty", "merges": "omit empty", "count": 37, "type": "remove by frequency"} -{"id": 39537, "token": ".\u00e2\u0122\u013f).", "merges": ".\u00e2\u0122\u013f ).", "count": 37, "type": "remove by frequency"} -{"id": 22520, "token": "\u0120\u00c3\u0133", "merges": "\u0120\u00c3 \u0133", "count": 37, "type": "remove by frequency"} -{"id": 53900, "token": "\u00e8\u00b1\u0136", "merges": "\u00e8\u00b1 \u0136", "count": 37, "type": "remove by frequency"} -{"id": 27212, "token": "\\%$", "merges": "\\% $", "count": 37, "type": "remove by frequency"} -{"id": 13348, "token": "\u00c2\u012e", "merges": "\u00c2 \u012e", "count": 37, "type": "remove by frequency"} -{"id": 48642, "token": "\u0120Punjab", "merges": "\u0120Pun jab", "count": 38, "type": "remove by frequency"} -{"id": 38560, "token": "ORANDUM", "merges": "ORAND UM", "count": 38, "type": "remove by frequency"} -{"id": 46005, "token": "\u0120Rabbi", "merges": "\u0120Rab bi", "count": 38, "type": "remove by frequency"} -{"id": 44766, "token": "}}}=", "merges": "}} }=", "count": 38, "type": "remove by frequency"} -{"id": 41284, "token": "/((-", "merges": "/( (-", "count": 38, "type": "remove by frequency"} -{"id": 19911, "token": "\u0120overexpression", "merges": "\u0120overex pression", "count": 39, "type": "remove by frequency"} -{"id": 39056, "token": "\u0120Fourteenth", "merges": "\u0120Four teenth", "count": 39, "type": "remove by frequency"} -{"id": 45207, "token": "\u0120Nigerian", "merges": "\u0120Niger ian", "count": 39, "type": "remove by frequency"} -{"id": 48046, "token": "\u0120Winnipeg", "merges": "\u0120Winn ipeg", "count": 39, "type": "remove by frequency"} -{"id": 49971, "token": "\u0120Anglican", "merges": "\u0120Anglic an", "count": 39, "type": "remove by frequency"} -{"id": 50028, "token": "\u0120Eleventh", "merges": "\u0120Ele venth", "count": 39, "type": "remove by frequency"} -{"id": 35775, "token": "\u0120Shortly", "merges": "\u0120Short ly", "count": 39, "type": "remove by frequency"} -{"id": 46555, "token": "\u0120Azerba", "merges": "\u0120A zerba", "count": 39, "type": "remove by frequency"} -{"id": 46099, "token": "\u0120JHEP", "merges": "\u0120J HEP", "count": 39, "type": "remove by frequency"} -{"id": 45868, "token": "\u0120DEGs", "merges": "\u0120DE Gs", "count": 39, "type": "remove by frequency"} -{"id": 37075, "token": "RSOS", "merges": "RS OS", "count": 39, "type": "remove by frequency"} -{"id": 46938, "token": "\u0120}_", "merges": "\u0120} _", "count": 39, "type": "remove by frequency"} -{"id": 24943, "token": "{$\\", "merges": "{ $\\", "count": 39, "type": "remove by frequency"} -{"id": 48776, "token": "\u0120doxorubicin", "merges": "\u0120doxor ubicin", "count": 40, "type": "remove by frequency"} -{"id": 40198, "token": "\u0120millilitres", "merges": "\u0120millilit res", "count": 40, "type": "remove by frequency"} -{"id": 48665, "token": "\u0120Arabidopsis", "merges": "\u0120Arab idopsis", "count": 40, "type": "remove by frequency"} -{"id": 24794, "token": "\u0120Russians", "merges": "\u0120Russ ians", "count": 40, "type": "remove by frequency"} -{"id": 33857, "token": "mathds", "merges": "math ds", "count": 40, "type": "remove by frequency"} -{"id": 39851, "token": "textsf", "merges": "text sf", "count": 40, "type": "remove by frequency"} -{"id": 47851, "token": "\u0120\u00ce\u00b5\u00ce\u00bd", "merges": "\u0120\u00ce\u00b5 \u00ce\u00bd", "count": 40, "type": "remove by frequency"} -{"id": 45176, "token": "\u0120\u00c3\u012bt", "merges": "\u0120\u00c3\u012b t", "count": 40, "type": "remove by frequency"} -{"id": 8001, "token": "\\]).", "merges": "\\] ).", "count": 40, "type": "remove by frequency"} -{"id": 23377, "token": "\u0120\u00e2\u012a\u00bc", "merges": "\u0120\u00e2\u012a \u00bc", "count": 40, "type": "remove by frequency"} -{"id": 48074, "token": "\u0120<%=", "merges": "\u0120< %=", "count": 40, "type": "remove by frequency"} -{"id": 37596, "token": "\\]-", "merges": "\\] -", "count": 40, "type": "remove by frequency"} -{"id": 36444, "token": "\u0120\u00c2\u00bc", "merges": "\u0120\u00c2 \u00bc", "count": 40, "type": "remove by frequency"} -{"id": 44698, "token": "\u0120Superintendent", "merges": "\u0120Super intendent", "count": 41, "type": "remove by frequency"} -{"id": 11885, "token": "\u0120Republicans", "merges": "\u0120Republic ans", "count": 41, "type": "remove by frequency"} -{"id": 42053, "token": "\u0120Aboriginal", "merges": "\u0120Abor iginal", "count": 41, "type": "remove by frequency"} -{"id": 45613, "token": "\u0120bioactive", "merges": "\u0120bio active", "count": 41, "type": "remove by frequency"} -{"id": 47637, "token": "\u0120Argentine", "merges": "\u0120Argent ine", "count": 41, "type": "remove by frequency"} -{"id": 38581, "token": "\u0120Zimbabwe", "merges": "\u0120Z imbabwe", "count": 41, "type": "remove by frequency"} -{"id": 23386, "token": "stackrel", "merges": "stack rel", "count": 41, "type": "remove by frequency"} -{"id": 49433, "token": "\u0120Auburn", "merges": "\u0120Aub urn", "count": 41, "type": "remove by frequency"} -{"id": 22845, "token": "\u0120Brexit", "merges": "\u0120Bre xit", "count": 41, "type": "remove by frequency"} -{"id": 45609, "token": "zitter", "merges": "z itter", "count": 41, "type": "remove by frequency"} -{"id": 41101, "token": "\u0120Comey", "merges": "\u0120Come y", "count": 41, "type": "remove by frequency"} -{"id": 20485, "token": "\u0120\u00c2\u00a7\u00c2\u00a7", "merges": "\u0120\u00c2\u00a7 \u00c2\u00a7", "count": 41, "type": "remove by frequency"} -{"id": 37329, "token": "\u0120$-\\", "merges": "\u0120$ -\\", "count": 41, "type": "remove by frequency"} -{"id": 22762, "token": "\u0120\u00c2\u00b5g", "merges": "\u0120\u00c2\u00b5 g", "count": 41, "type": "remove by frequency"} -{"id": 42409, "token": "\u0120Senators", "merges": "\u0120Sen ators", "count": 42, "type": "remove by frequency"} -{"id": 27412, "token": "\u0120Yemen", "merges": "\u0120Y emen", "count": 42, "type": "remove by frequency"} -{"id": 34273, "token": "}}^{-", "merges": "}} ^{-", "count": 42, "type": "remove by frequency"} -{"id": 17092, "token": "\\|_{", "merges": "\\| _{", "count": 42, "type": "remove by frequency"} -{"id": 26637, "token": "\u0120^{\\", "merges": "\u0120 ^{\\", "count": 42, "type": "remove by frequency"} -{"id": 41449, "token": ")[$", "merges": ")[ $", "count": 42, "type": "remove by frequency"} -{"id": 42308, "token": "''$", "merges": "'' $", "count": 42, "type": "remove by frequency"} -{"id": 10778, "token": "\u00c2\u0125", "merges": "\u00c2 \u0125", "count": 42, "type": "remove by frequency"} -{"id": 16815, "token": "=\"../../../../", "merges": "=\"../../ ../../", "count": 43, "type": "remove by frequency"} -{"id": 48251, "token": "\u0120monocyte", "merges": "\u0120mon ocyte", "count": 43, "type": "remove by frequency"} -{"id": 48644, "token": "\u0120lncRNAs", "merges": "\u0120lnc RNAs", "count": 43, "type": "remove by frequency"} -{"id": 46885, "token": "\u0120Kremlin", "merges": "\u0120Kre mlin", "count": 43, "type": "remove by frequency"} -{"id": 8834, "token": "amsfonts", "merges": "ams fonts", "count": 43, "type": "remove by frequency"} -{"id": 31205, "token": "\u0120Tampa", "merges": "\u0120T ampa", "count": 43, "type": "remove by frequency"} -{"id": 44674, "token": "\u0120hatte", "merges": "\u0120hat te", "count": 43, "type": "remove by frequency"} -{"id": 40415, "token": "\u0120ogs\u00c3\u00a5", "merges": "\u0120o gs\u00c3\u00a5", "count": 43, "type": "remove by frequency"} -{"id": 39427, "token": "gs\u00c3\u00a5", "merges": "gs \u00c3\u00a5", "count": 43, "type": "remove by frequency"} -{"id": 35762, "token": "\u0120\u00ef\u00bb\u00bf", "merges": "\u0120\u00ef \u00bb\u00bf", "count": 43, "type": "remove by frequency"} -{"id": 28247, "token": "\u0120\\,\\", "merges": "\u0120\\ ,\\", "count": 43, "type": "remove by frequency"} -{"id": 34398, "token": "\u0120$[\\", "merges": "\u0120$ [\\", "count": 43, "type": "remove by frequency"} -{"id": 35990, "token": "\u00e2\u0122\u0124", "merges": "\u00e2\u0122 \u0124", "count": 43, "type": "remove by frequency"} -{"id": 15802, "token": "\u00c2\u0136", "merges": "\u00c2 \u0136", "count": 43, "type": "remove by frequency"} -{"id": 45195, "token": "overrightarrow", "merges": "over rightarrow", "count": 44, "type": "remove by frequency"} -{"id": 44156, "token": "\u0120Surprisingly", "merges": "\u0120Sur prisingly", "count": 44, "type": "remove by frequency"} -{"id": 47840, "token": "\u0120Qu\u00c3\u00a9", "merges": "\u0120Qu \u00c3\u00a9", "count": 44, "type": "remove by frequency"} -{"id": 30848, "token": "}}$$", "merges": "}} $$", "count": 44, "type": "remove by frequency"} -{"id": 39829, "token": "})}\\", "merges": "}) }\\", "count": 44, "type": "remove by frequency"} -{"id": 31828, "token": "\u0120\u00e0\u00aa", "merges": "\u0120 \u00e0\u00aa", "count": 44, "type": "remove by frequency"} -{"id": 46658, "token": "\"}*", "merges": "\"} *", "count": 44, "type": "remove by frequency"} -{"id": 37067, "token": "\u0120Honorable", "merges": "\u0120Hon orable", "count": 45, "type": "remove by frequency"} -{"id": 21113, "token": "\u0120appellee", "merges": "\u0120app ellee", "count": 45, "type": "remove by frequency"} -{"id": 33512, "token": "\u0120NSCLC", "merges": "\u0120NS CLC", "count": 45, "type": "remove by frequency"} -{"id": 14024, "token": "}}_{\\", "merges": "}} _{\\", "count": 45, "type": "remove by frequency"} -{"id": 50302, "token": "\u00e2\u0123\u00b1", "merges": "\u00e2\u0123 \u00b1", "count": 45, "type": "remove by frequency"} -{"id": 42929, "token": "-$\\", "merges": "- $\\", "count": 45, "type": "remove by frequency"} -{"id": 12836, "token": "\u00e2\u0122\u0127", "merges": "\u00e2\u0122 \u0127", "count": 45, "type": "remove by frequency"} -{"id": 46849, "token": "^+(", "merges": "^+ (", "count": 45, "type": "remove by frequency"} -{"id": 13380, "token": "\\_[", "merges": "\\_ [", "count": 45, "type": "remove by frequency"} -{"id": 28786, "token": "\u0120Throughout", "merges": "\u0120Through out", "count": 46, "type": "remove by frequency"} -{"id": 42422, "token": "\u0120Panthers", "merges": "\u0120Pant hers", "count": 46, "type": "remove by frequency"} -{"id": 11861, "token": "subseteq", "merges": "subset eq", "count": 46, "type": "remove by frequency"} -{"id": 24297, "token": "\u0120Corollary", "merges": "\u0120Cor ollary", "count": 47, "type": "remove by frequency"} -{"id": 39769, "token": "\u0120Debtors", "merges": "\u0120Debt ors", "count": 47, "type": "remove by frequency"} -{"id": 29914, "token": "Bigr", "merges": "Big r", "count": 47, "type": "remove by frequency"} -{"id": 42613, "token": "\u0120Broncos", "merges": "\u0120Bron cos", "count": 48, "type": "remove by frequency"} -{"id": 39699, "token": "\u0120Browns", "merges": "\u0120Brow ns", "count": 48, "type": "remove by frequency"} -{"id": 33406, "token": "\u0120Nazis", "merges": "\u0120Naz is", "count": 48, "type": "remove by frequency"} -{"id": 18239, "token": "\u0120(\u00e2\u012a\u0134", "merges": "\u0120( \u00e2\u012a\u0134", "count": 48, "type": "remove by frequency"} -{"id": 32220, "token": "\u0120MSCs", "merges": "\u0120M SCs", "count": 48, "type": "remove by frequency"} -{"id": 44327, "token": "\u0120(\u00e2\u012b\u00a5", "merges": "\u0120( \u00e2\u012b\u00a5", "count": 48, "type": "remove by frequency"} -{"id": 16986, "token": ")/((-", "merges": ")/( (-", "count": 48, "type": "remove by frequency"} -{"id": 35623, "token": "\u0120Indigenous", "merges": "\u0120Ind igenous", "count": 49, "type": "remove by frequency"} -{"id": 39759, "token": "\u0120Baghdad", "merges": "\u0120Bag hdad", "count": 49, "type": "remove by frequency"} -{"id": 30174, "token": "\u0120Libya", "merges": "\u0120Lib ya", "count": 49, "type": "remove by frequency"} -{"id": 46855, "token": "\u0120Torah", "merges": "\u0120Tor ah", "count": 49, "type": "remove by frequency"} -{"id": 41065, "token": "\u0120DMEM", "merges": "\u0120DM EM", "count": 49, "type": "remove by frequency"} -{"id": 49043, "token": "\u0120\u00d8\u00a3\u00d9\u0128", "merges": "\u0120\u00d8\u00a3 \u00d9\u0128", "count": 49, "type": "remove by frequency"} -{"id": 6006, "token": "^\u00e2\u012a\u0134", "merges": "^ \u00e2\u012a\u0134", "count": 49, "type": "remove by frequency"} -{"id": 33701, "token": "\u0120*/,", "merges": "\u0120*/ ,", "count": 49, "type": "remove by frequency"} -{"id": 30984, "token": "\u0120}_{", "merges": "\u0120} _{", "count": 49, "type": "remove by frequency"} -{"id": 46265, "token": "[$\\", "merges": "[ $\\", "count": 49, "type": "remove by frequency"} -{"id": 43227, "token": "\u0120HOLDERS", "merges": "\u0120HOLD ERS", "count": 50, "type": "remove by frequency"} -{"id": 35699, "token": "\u0120Newark", "merges": "\u0120New ark", "count": 50, "type": "remove by frequency"} -{"id": 31855, "token": "\u0120qPCR", "merges": "\u0120q PCR", "count": 50, "type": "remove by frequency"} -{"id": 48457, "token": "\u0120FACS", "merges": "\u0120F ACS", "count": 50, "type": "remove by frequency"} -{"id": 38113, "token": "}_{(", "merges": "}_{ (", "count": 50, "type": "remove by frequency"} -{"id": 31550, "token": "\u00ce\u00bcl", "merges": "\u00ce\u00bc l", "count": 50, "type": "remove by frequency"} -{"id": 52395, "token": "\u00e6\u0143\u0130", "merges": "\u00e6\u0143 \u0130", "count": 50, "type": "remove by frequency"} -{"id": 38039, "token": ".\"_", "merges": ".\" _", "count": 50, "type": "remove by frequency"} -{"id": 20688, "token": "\u00ce\u00baB", "merges": "\u00ce\u00ba B", "count": 50, "type": "remove by frequency"} -{"id": 19018, "token": "\u0120Palestinian", "merges": "\u0120Palestin ian", "count": 51, "type": "remove by frequency"} -{"id": 34161, "token": "\u0120microglia", "merges": "\u0120microgl ia", "count": 51, "type": "remove by frequency"} -{"id": 48191, "token": "\u0120Malaysian", "merges": "\u0120Malays ian", "count": 51, "type": "remove by frequency"} -{"id": 43290, "token": "\u0120Packers", "merges": "\u0120Pack ers", "count": 51, "type": "remove by frequency"} -{"id": 37677, "token": "\u0120\u00ce\u00b1\u00cf\u0127\u00cf\u0126", "merges": "\u0120\u00ce\u00b1 \u00cf\u0127\u00cf\u0126", "count": 51, "type": "remove by frequency"} -{"id": 32233, "token": "\u0120mTOR", "merges": "\u0120m TOR", "count": 51, "type": "remove by frequency"} -{"id": 37578, "token": "^+\\", "merges": "^ +\\", "count": 51, "type": "remove by frequency"} -{"id": 31547, "token": "_->", "merges": "_ ->", "count": 51, "type": "remove by frequency"} -{"id": 33714, "token": "}\\!", "merges": "}\\ !", "count": 51, "type": "remove by frequency"} -{"id": 23560, "token": "\u0120Bankruptcy", "merges": "\u0120Bank ruptcy", "count": 52, "type": "remove by frequency"} -{"id": 48652, "token": "\u0120mistrial", "merges": "\u0120mist rial", "count": 52, "type": "remove by frequency"} -{"id": 39030, "token": "\u0120Midwest", "merges": "\u0120Mid west", "count": 52, "type": "remove by frequency"} -{"id": 34984, "token": "\u0120ARISING", "merges": "\u0120AR ISING", "count": 52, "type": "remove by frequency"} -{"id": 34875, "token": "\u0120mammary", "merges": "\u0120mam mary", "count": 52, "type": "remove by frequency"} -{"id": 27056, "token": "varrho", "merges": "var rho", "count": 52, "type": "remove by frequency"} -{"id": 46865, "token": "),\\\\", "merges": "), \\\\", "count": 52, "type": "remove by frequency"} -{"id": 14466, "token": "\u00e2\u0122\u00b2-", "merges": "\u00e2\u0122\u00b2 -", "count": 52, "type": "remove by frequency"} -{"id": 22372, "token": ",[@", "merges": ", [@", "count": 52, "type": "remove by frequency"} -{"id": 44369, "token": "ActivityThread", "merges": "Activity Thread", "count": 53, "type": "remove by frequency"} -{"id": 33867, "token": "\u0120Parlament", "merges": "\u0120Par lament", "count": 53, "type": "remove by frequency"} -{"id": 35363, "token": "\u0120Ethiopia", "merges": "\u0120Ethiop ia", "count": 53, "type": "remove by frequency"} -{"id": 40787, "token": "\u0120Yugoslav", "merges": "\u0120Yug oslav", "count": 53, "type": "remove by frequency"} -{"id": 40136, "token": "\u0120Cassie", "merges": "\u0120Cass ie", "count": 53, "type": "remove by frequency"} -{"id": 47234, "token": "taient", "merges": "ta ient", "count": 53, "type": "remove by frequency"} -{"id": 13960, "token": "simeq", "merges": "sime q", "count": 53, "type": "remove by frequency"} -{"id": 49159, "token": "\u0120Qur", "merges": "\u0120Q ur", "count": 53, "type": "remove by frequency"} -{"id": 34580, "token": "}.$", "merges": "}. $", "count": 53, "type": "remove by frequency"} -{"id": 50838, "token": "\u00e5\u012d\u00b3", "merges": "\u00e5\u012d \u00b3", "count": 53, "type": "remove by frequency"} -{"id": 49763, "token": "\u00e0\u00ab\u0129", "merges": "\u00e0\u00ab \u0129", "count": 53, "type": "remove by frequency"} -{"id": 46245, "token": "\u0120tumorigenesis", "merges": "\u0120tumorigen esis", "count": 54, "type": "remove by frequency"} -{"id": 43939, "token": "\u0120perturbative", "merges": "\u0120perturb ative", "count": 54, "type": "remove by frequency"} -{"id": 45914, "token": "\u0120Tanzania", "merges": "\u0120Tanz ania", "count": 54, "type": "remove by frequency"} -{"id": 32647, "token": "\u0120neurode", "merges": "\u0120neuro de", "count": 54, "type": "remove by frequency"} -{"id": 44117, "token": "\u0120Veteran", "merges": "\u0120Veter an", "count": 54, "type": "remove by frequency"} -{"id": 39086, "token": "\u0120agarose", "merges": "\u0120agar ose", "count": 54, "type": "remove by frequency"} -{"id": 42757, "token": "Sprintf", "merges": "S printf", "count": 54, "type": "remove by frequency"} -{"id": 39566, "token": "\u0120Ebola", "merges": "\u0120Eb ola", "count": 54, "type": "remove by frequency"} -{"id": 24448, "token": "^{(\\", "merges": "^{ (\\", "count": 54, "type": "remove by frequency"} -{"id": 31502, "token": "\u0120$^", "merges": "\u0120$ ^", "count": 54, "type": "remove by frequency"} -{"id": 46655, "token": "\\*,", "merges": "\\ *,", "count": 54, "type": "remove by frequency"} -{"id": 27863, "token": "\u0120cytotoxicity", "merges": "\u0120cytotox icity", "count": 55, "type": "remove by frequency"} -{"id": 32038, "token": "\u0120chemok", "merges": "\u0120chem ok", "count": 55, "type": "remove by frequency"} -{"id": 19592, "token": "\u0120Judges", "merges": "\u0120Jud ges", "count": 55, "type": "remove by frequency"} -{"id": 40906, "token": "\u0120Haiti", "merges": "\u0120Hait i", "count": 55, "type": "remove by frequency"} -{"id": 26038, "token": "=\"@+", "merges": "=\"@ +", "count": 55, "type": "remove by frequency"} -{"id": 47940, "token": "\u0120%>%", "merges": "\u0120%> %", "count": 55, "type": "remove by frequency"} -{"id": 48662, "token": "\u0120HFD", "merges": "\u0120H FD", "count": 55, "type": "remove by frequency"} -{"id": 49449, "token": "\u0120postoperatively", "merges": "\u0120post operatively", "count": 56, "type": "remove by frequency"} -{"id": 45291, "token": "\u0120Opposition", "merges": "\u0120Opp osition", "count": 56, "type": "remove by frequency"} -{"id": 48070, "token": "smallmatrix", "merges": "small matrix", "count": 56, "type": "remove by frequency"} -{"id": 45351, "token": "\u0120Reverend", "merges": "\u0120Reve rend", "count": 56, "type": "remove by frequency"} -{"id": 38736, "token": "databind", "merges": "datab ind", "count": 56, "type": "remove by frequency"} -{"id": 25487, "token": "\u0120Briefly", "merges": "\u0120Brief ly", "count": 56, "type": "remove by frequency"} -{"id": 37243, "token": "\u00e2\u0122\u00b2,", "merges": "\u00e2\u0122\u00b2 ,", "count": 56, "type": "remove by frequency"} -{"id": 38341, "token": "\u0120($(", "merges": "\u0120($ (", "count": 56, "type": "remove by frequency"} -{"id": 54328, "token": "\u00e9\u013b\u0140", "merges": "\u00e9\u013b \u0140", "count": 56, "type": "remove by frequency"} -{"id": 34846, "token": "\\}}", "merges": "\\ }}", "count": 56, "type": "remove by frequency"} -{"id": 14198, "token": "\u0120Palestin", "merges": "\u0120Palest in", "count": 57, "type": "remove by frequency"} -{"id": 34657, "token": "appellant", "merges": "app ellant", "count": 57, "type": "remove by frequency"} -{"id": 34745, "token": "\u0120LIABLE", "merges": "\u0120LI ABLE", "count": 57, "type": "remove by frequency"} -{"id": 53298, "token": "\u00e7\u00b5\u0125", "merges": "\u00e7\u00b5 \u0125", "count": 57, "type": "remove by frequency"} -{"id": 24638, "token": "^*\\", "merges": "^* \\", "count": 57, "type": "remove by frequency"} -{"id": 16643, "token": "\u00c2\u0127", "merges": "\u00c2 \u0127", "count": 57, "type": "remove by frequency"} -{"id": 50155, "token": "\u0120Parenthood", "merges": "\u0120Pa renthood", "count": 58, "type": "remove by frequency"} -{"id": 46777, "token": "\u0120falciparum", "merges": "\u0120fal ciparum", "count": 58, "type": "remove by frequency"} -{"id": 39447, "token": "\u0120Scripture", "merges": "\u0120Script ure", "count": 58, "type": "remove by frequency"} -{"id": 42591, "token": "\u0120Judiciary", "merges": "\u0120Jud iciary", "count": 58, "type": "remove by frequency"} -{"id": 49560, "token": "\u00c3\u00a4sident", "merges": "\u00c3\u00a4s ident", "count": 58, "type": "remove by frequency"} -{"id": 48114, "token": "\u0120myosin", "merges": "\u0120my osin", "count": 58, "type": "remove by frequency"} -{"id": 30401, "token": "\u0120Canad", "merges": "\u0120Can ad", "count": 58, "type": "remove by frequency"} -{"id": 44418, "token": "))$,", "merges": ") )$,", "count": 58, "type": "remove by frequency"} -{"id": 29270, "token": "}}=\\", "merges": "}} =\\", "count": 58, "type": "remove by frequency"} -{"id": 29119, "token": ")}$,", "merges": ") }$,", "count": 58, "type": "remove by frequency"} -{"id": 35911, "token": "\u0120\\^", "merges": "\u0120\\ ^", "count": 58, "type": "remove by frequency"} -{"id": 37040, "token": "\u0120neutrophil", "merges": "\u0120neutroph il", "count": 59, "type": "remove by frequency"} -{"id": 8865, "token": "idemargin", "merges": "idem argin", "count": 59, "type": "remove by frequency"} -{"id": 20003, "token": "\u0120Germans", "merges": "\u0120Germ ans", "count": 59, "type": "remove by frequency"} -{"id": 48069, "token": "\u0120Serbian", "merges": "\u0120Ser bian", "count": 59, "type": "remove by frequency"} -{"id": 41974, "token": "\u0120f\u00c3\u00b6rs", "merges": "\u0120f\u00c3\u00b6 rs", "count": 59, "type": "remove by frequency"} -{"id": 46538, "token": ")}^{", "merges": ") }^{", "count": 59, "type": "remove by frequency"} -{"id": 39478, "token": "}$;", "merges": "}$ ;", "count": 59, "type": "remove by frequency"} -{"id": 38540, "token": "\u0120Confederate", "merges": "\u0120Confed erate", "count": 60, "type": "remove by frequency"} -{"id": 43133, "token": "\u0120Researchers", "merges": "\u0120Res earchers", "count": 60, "type": "remove by frequency"} -{"id": 38513, "token": "\u0120polyclonal", "merges": "\u0120poly clonal", "count": 60, "type": "remove by frequency"} -{"id": 40073, "token": "\u0120Biosystems", "merges": "\u0120Bios ystems", "count": 60, "type": "remove by frequency"} -{"id": 32242, "token": "\u0120immunore", "merges": "\u0120immun ore", "count": 60, "type": "remove by frequency"} -{"id": 25700, "token": "\u0120Medicaid", "merges": "\u0120Med icaid", "count": 60, "type": "remove by frequency"} -{"id": 46544, "token": "\u0120Spani", "merges": "\u0120Sp ani", "count": 60, "type": "remove by frequency"} -{"id": 49548, "token": "\u0120Judah", "merges": "\u0120Jud ah", "count": 60, "type": "remove by frequency"} -{"id": 16052, "token": "}}^{\\", "merges": "}} ^{\\", "count": 60, "type": "remove by frequency"} -{"id": 39885, "token": "$^{\\", "merges": "$ ^{\\", "count": 60, "type": "remove by frequency"} -{"id": 51600, "token": "\u00e5\u00bc\u0136", "merges": "\u00e5\u00bc \u0136", "count": 60, "type": "remove by frequency"} -{"id": 53302, "token": "\u00e7\u00b6\u0133", "merges": "\u00e7\u00b6 \u0133", "count": 60, "type": "remove by frequency"} -{"id": 26130, "token": "\u00ce\u00bcM", "merges": "\u00ce\u00bc M", "count": 60, "type": "remove by frequency"} -{"id": 29650, "token": "_{[", "merges": "_{ [", "count": 60, "type": "remove by frequency"} -{"id": 45053, "token": "\u0120Edmonton", "merges": "\u0120Ed monton", "count": 61, "type": "remove by frequency"} -{"id": 40987, "token": "\u0120DAPI", "merges": "\u0120D API", "count": 61, "type": "remove by frequency"} -{"id": 54599, "token": "\u00ef\u00bd\u00af", "merges": "\u00ef\u00bd \u00af", "count": 61, "type": "remove by frequency"} -{"id": 47106, "token": "^+_", "merges": "^+ _", "count": 61, "type": "remove by frequency"} -{"id": 42979, "token": "\u0120Quantification", "merges": "\u0120Quant ification", "count": 62, "type": "remove by frequency"} -{"id": 49500, "token": "\u0120Saskatchewan", "merges": "\u0120Sask atchewan", "count": 62, "type": "remove by frequency"} -{"id": 32635, "token": "\u0120Minneapolis", "merges": "\u0120Min neapolis", "count": 62, "type": "remove by frequency"} -{"id": 44686, "token": "\u0120neoplastic", "merges": "\u0120ne oplastic", "count": 62, "type": "remove by frequency"} -{"id": 46406, "token": "\u0120Dominican", "merges": "\u0120Domin ican", "count": 62, "type": "remove by frequency"} -{"id": 25900, "token": "\u0120IMPLIED", "merges": "\u0120IM PLIED", "count": 62, "type": "remove by frequency"} -{"id": 40145, "token": "\u0120Ecuador", "merges": "\u0120E cuador", "count": 62, "type": "remove by frequency"} -{"id": 20533, "token": "\u0120Iranian", "merges": "\u0120Iran ian", "count": 62, "type": "remove by frequency"} -{"id": 30356, "token": "marined", "merges": "mar ined", "count": 62, "type": "remove by frequency"} -{"id": 54596, "token": "\u00ef\u00bd\u00a3", "merges": "\u00ef\u00bd \u00a3", "count": 62, "type": "remove by frequency"} -{"id": 25942, "token": ":=\\", "merges": ": =\\", "count": 62, "type": "remove by frequency"} -{"id": 42566, "token": "\u0120osteoclast", "merges": "\u0120oste oclast", "count": 63, "type": "remove by frequency"} -{"id": 30884, "token": "underset", "merges": "unders et", "count": 63, "type": "remove by frequency"} -{"id": 44188, "token": "supset", "merges": "sup set", "count": 63, "type": "remove by frequency"} -{"id": 34033, "token": "\u0120mRNAs", "merges": "\u0120mRNA s", "count": 63, "type": "remove by frequency"} -{"id": 42666, "token": "\u0120d\u00c3\u00a4r", "merges": "\u0120d \u00c3\u00a4r", "count": 63, "type": "remove by frequency"} -{"id": 16996, "token": "}})$", "merges": "}} )$", "count": 63, "type": "remove by frequency"} -{"id": 35763, "token": "\u00ce\u00bcL", "merges": "\u00ce\u00bc L", "count": 63, "type": "remove by frequency"} -{"id": 53296, "token": "\u00e7\u00b4\u00ae", "merges": "\u00e7\u00b4 \u00ae", "count": 63, "type": "remove by frequency"} -{"id": 14522, "token": "\u0120Interestingly", "merges": "\u0120Interest ingly", "count": 64, "type": "remove by frequency"} -{"id": 48376, "token": "\u0120transgene", "merges": "\u0120trans gene", "count": 64, "type": "remove by frequency"} -{"id": 40269, "token": "\u0120Vikings", "merges": "\u0120Vik ings", "count": 64, "type": "remove by frequency"} -{"id": 34915, "token": "\u0120TORT", "merges": "\u0120T ORT", "count": 64, "type": "remove by frequency"} -{"id": 44272, "token": "\u0120Cubs", "merges": "\u0120Cub s", "count": 64, "type": "remove by frequency"} -{"id": 46433, "token": ")$;", "merges": ")$ ;", "count": 64, "type": "remove by frequency"} -{"id": 23794, "token": "\u00c2\u0132", "merges": "\u00c2 \u0132", "count": 64, "type": "remove by frequency"} -{"id": 37584, "token": "\u0120comorbidities", "merges": "\u0120comorbid ities", "count": 65, "type": "remove by frequency"} -{"id": 48847, "token": "\u0120Socorro", "merges": "\u0120Soc orro", "count": 65, "type": "remove by frequency"} -{"id": 32024, "token": "\u0120DAMAGES", "merges": "\u0120DAM AGES", "count": 65, "type": "remove by frequency"} -{"id": 28711, "token": "}^{*", "merges": "}^{ *", "count": 65, "type": "remove by frequency"} -{"id": 44106, "token": "at\u00c4\u0125", "merges": "at \u00c4\u0125", "count": 65, "type": "remove by frequency"} -{"id": 48653, "token": "\u0120Sgt", "merges": "\u0120S gt", "count": 65, "type": "remove by frequency"} -{"id": 42696, "token": ",\\[", "merges": ",\\ [", "count": 65, "type": "remove by frequency"} -{"id": 44391, "token": "#{$", "merges": "# {$", "count": 65, "type": "remove by frequency"} -{"id": 18637, "token": "longrightarrow", "merges": "long rightarrow", "count": 66, "type": "remove by frequency"} -{"id": 35032, "token": "\u0120xenograft", "merges": "\u0120xen ograft", "count": 66, "type": "remove by frequency"} -{"id": 40863, "token": "\u0120Cowboys", "merges": "\u0120Cow boys", "count": 66, "type": "remove by frequency"} -{"id": 35522, "token": "\u0120Uganda", "merges": "\u0120Ug anda", "count": 66, "type": "remove by frequency"} -{"id": 32377, "token": "\u0120Enron", "merges": "\u0120En ron", "count": 66, "type": "remove by frequency"} -{"id": 32453, "token": "}\\;", "merges": "}\\ ;", "count": 66, "type": "remove by frequency"} -{"id": 33189, "token": "'$,", "merges": "' $,", "count": 66, "type": "remove by frequency"} -{"id": 34575, "token": "\u012000000000000000000000000000000000", "merges": "\u0120 00000000000000000000000000000000", "count": 67, "type": "remove by frequency"} -{"id": 30445, "token": "\u0120Certainly", "merges": "\u0120C ertainly", "count": 67, "type": "remove by frequency"} -{"id": 40767, "token": "\u0120Brisbane", "merges": "\u0120Bris bane", "count": 67, "type": "remove by frequency"} -{"id": 33366, "token": "\u0120Commiss", "merges": "\u0120Com miss", "count": 67, "type": "remove by frequency"} -{"id": 44092, "token": "\u0120Denote", "merges": "\u0120Den ote", "count": 67, "type": "remove by frequency"} -{"id": 43664, "token": "\u0120Lemmon", "merges": "\u0120Lem mon", "count": 67, "type": "remove by frequency"} -{"id": 20886, "token": "\u0120({\\", "merges": "\u0120( {\\", "count": 67, "type": "remove by frequency"} -{"id": 48323, "token": "\u0120\u00c3\u0124", "merges": "\u0120\u00c3 \u0124", "count": 67, "type": "remove by frequency"} -{"id": 24060, "token": "&=\\", "merges": "& =\\", "count": 67, "type": "remove by frequency"} -{"id": 34476, "token": "![(", "merges": "![ (", "count": 67, "type": "remove by frequency"} -{"id": 52786, "token": "\u00e7\u012b\u00b4", "merges": "\u00e7\u012b \u00b4", "count": 67, "type": "remove by frequency"} -{"id": 45692, "token": "\u0120heterosexual", "merges": "\u0120heter osexual", "count": 68, "type": "remove by frequency"} -{"id": 26951, "token": "\u0120supernatant", "merges": "\u0120supernat ant", "count": 68, "type": "remove by frequency"} -{"id": 27255, "token": "\u0120LIABILITY", "merges": "\u0120LI ABILITY", "count": 68, "type": "remove by frequency"} -{"id": 29649, "token": "\u0120monocytes", "merges": "\u0120mon ocytes", "count": 68, "type": "remove by frequency"} -{"id": 33668, "token": "\u0120Patriots", "merges": "\u0120Patri ots", "count": 68, "type": "remove by frequency"} -{"id": 40585, "token": "\u0120Pelosi", "merges": "\u0120Pel osi", "count": 68, "type": "remove by frequency"} -{"id": 40327, "token": "\u0120Celtic", "merges": "\u0120Celt ic", "count": 68, "type": "remove by frequency"} -{"id": 34275, "token": "\u0120Mormon", "merges": "\u0120Morm on", "count": 68, "type": "remove by frequency"} -{"id": 47719, "token": "\u00e2\u0124\u00ac\u00e2\u0126\u00a2", "merges": "\u00e2\u0124\u00ac \u00e2\u0126\u00a2", "count": 68, "type": "remove by frequency"} -{"id": 24076, "token": "\u0120Gaza", "merges": "\u0120G aza", "count": 68, "type": "remove by frequency"} -{"id": 38833, "token": "\u0120\u00ce\u00b8\u00ce\u00b1", "merges": "\u0120\u00ce\u00b8 \u00ce\u00b1", "count": 68, "type": "remove by frequency"} -{"id": 30485, "token": "\u0120}{\\", "merges": "\u0120} {\\", "count": 68, "type": "remove by frequency"} -{"id": 29432, "token": "*.,", "merges": "* .,", "count": 68, "type": "remove by frequency"} -{"id": 21824, "token": "\u00c2\u0131", "merges": "\u00c2 \u0131", "count": 68, "type": "remove by frequency"} -{"id": 37486, "token": "\u0120tumorigen", "merges": "\u0120tumor igen", "count": 69, "type": "remove by frequency"} -{"id": 47607, "token": "\u0120\u00cf\u0125\u00cf\u0126\u00ce\u00b7\u00ce\u00bd", "merges": "\u0120\u00cf\u0125\u00cf\u0126\u00ce\u00b7 \u00ce\u00bd", "count": 69, "type": "remove by frequency"} -{"id": 41560, "token": "\u0120n\u00c3\u00a4r", "merges": "\u0120n \u00c3\u00a4r", "count": 69, "type": "remove by frequency"} -{"id": 33996, "token": "varpi", "merges": "var pi", "count": 69, "type": "remove by frequency"} -{"id": 18381, "token": "pcbi", "merges": "pc bi", "count": 69, "type": "remove by frequency"} -{"id": 50292, "token": "\u00e1\u0125\u00a6", "merges": "\u00e1\u0125 \u00a6", "count": 69, "type": "remove by frequency"} -{"id": 42625, "token": "Interestingly", "merges": "Interest ingly", "count": 70, "type": "remove by frequency"} -{"id": 48093, "token": "\u0120phosphatidyl", "merges": "\u0120phosphat idyl", "count": 70, "type": "remove by frequency"} -{"id": 40571, "token": "\u0120Biosciences", "merges": "\u0120Biosc iences", "count": 70, "type": "remove by frequency"} -{"id": 24883, "token": "\u0120Nonetheless", "merges": "\u0120Non etheless", "count": 70, "type": "remove by frequency"} -{"id": 35416, "token": "\u0120Kashmir", "merges": "\u0120Kash mir", "count": 70, "type": "remove by frequency"} -{"id": 10609, "token": "mathscr", "merges": "math scr", "count": 70, "type": "remove by frequency"} -{"id": 46562, "token": "\u0120Carls", "merges": "\u0120Car ls", "count": 70, "type": "remove by frequency"} -{"id": 43189, "token": ")[@", "merges": ") [@", "count": 70, "type": "remove by frequency"} -{"id": 42714, "token": "}$:", "merges": "}$ :", "count": 70, "type": "remove by frequency"} -{"id": 47658, "token": "\u0120dopaminergic", "merges": "\u0120dop aminergic", "count": 71, "type": "remove by frequency"} -{"id": 21485, "token": "\u0120Respondent", "merges": "\u0120Respond ent", "count": 71, "type": "remove by frequency"} -{"id": 50013, "token": "\u0120Arlington", "merges": "\u0120Ar lington", "count": 71, "type": "remove by frequency"} -{"id": 20637, "token": "\u0120supernat", "merges": "\u0120supern at", "count": 71, "type": "remove by frequency"} -{"id": 40261, "token": "\u0120Scandin", "merges": "\u0120Sc andin", "count": 71, "type": "remove by frequency"} -{"id": 20271, "token": "\u0120siRNA", "merges": "\u0120si RNA", "count": 71, "type": "remove by frequency"} -{"id": 43255, "token": "rceil", "merges": "r ceil", "count": 71, "type": "remove by frequency"} -{"id": 49170, "token": "\u00e0\u00aa\u00be", "merges": "\u00e0\u00aa \u00be", "count": 71, "type": "remove by frequency"} -{"id": 47624, "token": "\u0120Northwestern", "merges": "\u0120North western", "count": 72, "type": "remove by frequency"} -{"id": 44174, "token": "\u0120DISCLAIM", "merges": "\u0120DIS CLAIM", "count": 72, "type": "remove by frequency"} -{"id": 42862, "token": "\u0120Croatia", "merges": "\u0120Croat ia", "count": 72, "type": "remove by frequency"} -{"id": 47506, "token": "\u0120Dodgers", "merges": "\u0120Dod gers", "count": 72, "type": "remove by frequency"} -{"id": 48863, "token": "\u0120Kosovo", "merges": "\u0120Kos ovo", "count": 72, "type": "remove by frequency"} -{"id": 41164, "token": "\u0120detta", "merges": "\u0120det ta", "count": 72, "type": "remove by frequency"} -{"id": 42914, "token": "lceil", "merges": "l ceil", "count": 72, "type": "remove by frequency"} -{"id": 49437, "token": "\u00e1\u00bf\u00b6\u00ce\u00bd", "merges": "\u00e1\u00bf\u00b6 \u00ce\u00bd", "count": 72, "type": "remove by frequency"} -{"id": 4622, "token": "*]{}", "merges": "* ]{}", "count": 72, "type": "remove by frequency"} -{"id": 42875, "token": "\u0120AKT", "merges": "\u0120AK T", "count": 72, "type": "remove by frequency"} -{"id": 50325, "token": "\u00e2\u012a\u00bd", "merges": "\u00e2\u012a \u00bd", "count": 72, "type": "remove by frequency"} -{"id": 22015, "token": "\u00c2\u012f", "merges": "\u00c2 \u012f", "count": 72, "type": "remove by frequency"} -{"id": 20054, "token": "\u00c2\u013c", "merges": "\u00c2 \u013c", "count": 72, "type": "remove by frequency"} -{"id": 46965, "token": "\u0120antifer", "merges": "\u0120ant ifer", "count": 73, "type": "remove by frequency"} -{"id": 31874, "token": "\u0120Somal", "merges": "\u0120S omal", "count": 73, "type": "remove by frequency"} -{"id": 22254, "token": "}}}_{", "merges": "}} }_{", "count": 73, "type": "remove by frequency"} -{"id": 48727, "token": "\u0120USSR", "merges": "\u0120US SR", "count": 73, "type": "remove by frequency"} -{"id": 40360, "token": "}$),", "merges": "}$ ),", "count": 73, "type": "remove by frequency"} -{"id": 49938, "token": "\u0120$+", "merges": "\u0120$ +", "count": 73, "type": "remove by frequency"} -{"id": 41829, "token": "\u00c2\u00b5g", "merges": "\u00c2\u00b5 g", "count": 73, "type": "remove by frequency"} -{"id": 50009, "token": "\u0120Cherokee", "merges": "\u0120Che rokee", "count": 74, "type": "remove by frequency"} -{"id": 13389, "token": "\u0120Israeli", "merges": "\u0120Israel i", "count": 74, "type": "remove by frequency"} -{"id": 49260, "token": "\u0120Andr\u00c3\u00a9", "merges": "\u0120And r\u00c3\u00a9", "count": 74, "type": "remove by frequency"} -{"id": 47563, "token": "\u0120Turks", "merges": "\u0120Tur ks", "count": 74, "type": "remove by frequency"} -{"id": 46145, "token": "<>();", "merges": "< >();", "count": 74, "type": "remove by frequency"} -{"id": 42569, "token": "\u0120Jeg", "merges": "\u0120J eg", "count": 74, "type": "remove by frequency"} -{"id": 31704, "token": "\u0120Wnt", "merges": "\u0120W nt", "count": 74, "type": "remove by frequency"} -{"id": 27860, "token": "_{+", "merges": "_{ +", "count": 74, "type": "remove by frequency"} -{"id": 53547, "token": "\u00e8\u0126\u00a9", "merges": "\u00e8\u0126 \u00a9", "count": 74, "type": "remove by frequency"} -{"id": 19905, "token": "\u0120neutroph", "merges": "\u0120neut roph", "count": 75, "type": "remove by frequency"} -{"id": 48022, "token": "\u0120Plymouth", "merges": "\u0120Ply mouth", "count": 75, "type": "remove by frequency"} -{"id": 44736, "token": "\u0120Anglic", "merges": "\u0120Ang lic", "count": 75, "type": "remove by frequency"} -{"id": 46614, "token": "\u0120Kuwait", "merges": "\u0120Ku wait", "count": 75, "type": "remove by frequency"} -{"id": 35889, "token": "\u0120Tehran", "merges": "\u0120Teh ran", "count": 75, "type": "remove by frequency"} -{"id": 49308, "token": "\u0120Golgi", "merges": "\u0120Gol gi", "count": 75, "type": "remove by frequency"} -{"id": 34353, "token": "-\\-\\", "merges": "-\\ -\\", "count": 75, "type": "remove by frequency"} -{"id": 48885, "token": ")\u00e2\u012a\u0134", "merges": ") \u00e2\u012a\u0134", "count": 75, "type": "remove by frequency"} -{"id": 27927, "token": "}})\\", "merges": "}} )\\", "count": 75, "type": "remove by frequency"} -{"id": 7433, "token": "^+^", "merges": "^+ ^", "count": 75, "type": "remove by frequency"} -{"id": 54595, "token": "\u00ef\u00bd\u00a2", "merges": "\u00ef\u00bd \u00a2", "count": 75, "type": "remove by frequency"} -{"id": 43506, "token": "\u0120Judaism", "merges": "\u0120Juda ism", "count": 76, "type": "remove by frequency"} -{"id": 36582, "token": "\u0120Wyoming", "merges": "\u0120Wy oming", "count": 76, "type": "remove by frequency"} -{"id": 43372, "token": "\u0120Sloven", "merges": "\u0120Sl oven", "count": 76, "type": "remove by frequency"} -{"id": 48875, "token": "\u0120chemot", "merges": "\u0120chem ot", "count": 76, "type": "remove by frequency"} -{"id": 8822, "token": "amssymb", "merges": "amss ymb", "count": 76, "type": "remove by frequency"} -{"id": 42967, "token": "\u0120Borel", "merges": "\u0120B orel", "count": 76, "type": "remove by frequency"} -{"id": 44600, "token": "\u0120Athen", "merges": "\u0120A then", "count": 76, "type": "remove by frequency"} -{"id": 44763, "token": "\u0120mononuclear", "merges": "\u0120mon onuclear", "count": 77, "type": "remove by frequency"} -{"id": 23135, "token": "Accordingly", "merges": "According ly", "count": 77, "type": "remove by frequency"} -{"id": 28994, "token": "\u0120Presidente", "merges": "\u0120President e", "count": 77, "type": "remove by frequency"} -{"id": 46857, "token": "\u0120Poincar\u00c3\u00a9", "merges": "\u0120Poinc ar\u00c3\u00a9", "count": 77, "type": "remove by frequency"} -{"id": 26832, "token": "\u0120microgl", "merges": "\u0120micro gl", "count": 77, "type": "remove by frequency"} -{"id": 49212, "token": "\u0120THEORY", "merges": "\u0120THE ORY", "count": 77, "type": "remove by frequency"} -{"id": 22736, "token": "\u0120Debtor", "merges": "\u0120Debt or", "count": 77, "type": "remove by frequency"} -{"id": 49428, "token": "\u0120serop", "merges": "\u0120ser op", "count": 77, "type": "remove by frequency"} -{"id": 28543, "token": "\u0120$(\"#", "merges": "\u0120$ (\"#", "count": 77, "type": "remove by frequency"} -{"id": 43839, "token": "\u0120Hartford", "merges": "\u0120Hart ford", "count": 78, "type": "remove by frequency"} -{"id": 47419, "token": "\u0120Damascus", "merges": "\u0120Dam ascus", "count": 78, "type": "remove by frequency"} -{"id": 47909, "token": "\u0120\u00cf\u0125\u00cf\u0127\u00ce\u00bd", "merges": "\u0120\u00cf\u0125\u00cf\u0127 \u00ce\u00bd", "count": 78, "type": "remove by frequency"} -{"id": 37876, "token": "\u0120moeten", "merges": "\u0120mo eten", "count": 78, "type": "remove by frequency"} -{"id": 37425, "token": "ENRON", "merges": "EN RON", "count": 78, "type": "remove by frequency"} -{"id": 48745, "token": "\u0120ATCC", "merges": "\u0120AT CC", "count": 78, "type": "remove by frequency"} -{"id": 19289, "token": "}_{-", "merges": "}_{ -", "count": 78, "type": "remove by frequency"} -{"id": 49286, "token": "\u0120Esq", "merges": "\u0120Es q", "count": 78, "type": "remove by frequency"} -{"id": 41663, "token": "\u00d1\u012c\u00d1\u0122", "merges": "\u00d1\u012c \u00d1\u0122", "count": 78, "type": "remove by frequency"} -{"id": 33070, "token": "\u0120Indianapolis", "merges": "\u0120Indian apolis", "count": 79, "type": "remove by frequency"} -{"id": 24083, "token": "\u0120Subsequently", "merges": "\u0120Sub sequently", "count": 79, "type": "remove by frequency"} -{"id": 46560, "token": "\u0120Guatemala", "merges": "\u0120Guatem ala", "count": 79, "type": "remove by frequency"} -{"id": 36111, "token": "\u0120OPINION", "merges": "\u0120OP INION", "count": 79, "type": "remove by frequency"} -{"id": 46365, "token": "\u0120Whilst", "merges": "\u0120Wh ilst", "count": 79, "type": "remove by frequency"} -{"id": 42605, "token": "\u0120Banach", "merges": "\u0120Ban ach", "count": 79, "type": "remove by frequency"} -{"id": 34280, "token": "\u0120Calif", "merges": "\u0120Cal if", "count": 79, "type": "remove by frequency"} -{"id": 48812, "token": "\u0120NRA", "merges": "\u0120N RA", "count": 79, "type": "remove by frequency"} -{"id": 48534, "token": "\u0120Syracuse", "merges": "\u0120Syrac use", "count": 80, "type": "remove by frequency"} -{"id": 49290, "token": "\u0120M\u00c3\u00bcller", "merges": "\u0120M \u00c3\u00bcller", "count": 80, "type": "remove by frequency"} -{"id": 40983, "token": "\u0120Morocco", "merges": "\u0120Mor occo", "count": 80, "type": "remove by frequency"} -{"id": 44937, "token": "\u0120Perth", "merges": "\u0120Per th", "count": 80, "type": "remove by frequency"} -{"id": 43276, "token": "\u0120Clint", "merges": "\u0120Cl int", "count": 80, "type": "remove by frequency"} -{"id": 37450, "token": "}}}^{", "merges": "}} }^{", "count": 80, "type": "remove by frequency"} -{"id": 31530, "token": "\u0120hogy", "merges": "\u0120h ogy", "count": 80, "type": "remove by frequency"} -{"id": 49370, "token": "\u0120Treg", "merges": "\u0120T reg", "count": 80, "type": "remove by frequency"} -{"id": 3455, "token": "]{},", "merges": "]{} ,", "count": 80, "type": "remove by frequency"} -{"id": 34027, "token": "\u0120$$(", "merges": "\u0120$$ (", "count": 80, "type": "remove by frequency"} -{"id": 26997, "token": "\\^[", "merges": "\\^ [", "count": 80, "type": "remove by frequency"} -{"id": 34399, "token": "\u0120Suddenly", "merges": "\u0120Sud denly", "count": 81, "type": "remove by frequency"} -{"id": 21618, "token": "\u0120Arkansas", "merges": "\u0120Ark ansas", "count": 81, "type": "remove by frequency"} -{"id": 37252, "token": "\u0120Norfolk", "merges": "\u0120Nor folk", "count": 81, "type": "remove by frequency"} -{"id": 45277, "token": "\u0120Trudeau", "merges": "\u0120Tr udeau", "count": 81, "type": "remove by frequency"} -{"id": 33891, "token": "\u0120Yankees", "merges": "\u0120Yan kees", "count": 81, "type": "remove by frequency"} -{"id": 38372, "token": "\u0120Qatar", "merges": "\u0120Q atar", "count": 81, "type": "remove by frequency"} -{"id": 32330, "token": "\u0120NLR", "merges": "\u0120N LR", "count": 81, "type": "remove by frequency"} -{"id": 53435, "token": "\u00e7\u00be\u00a8", "merges": "\u00e7\u00be \u00a8", "count": 81, "type": "remove by frequency"} -{"id": 35055, "token": "\u0120immunofluorescence", "merges": "\u0120immun ofluorescence", "count": 82, "type": "remove by frequency"} -{"id": 47518, "token": "\u0120Affordable", "merges": "\u0120Aff ordable", "count": 82, "type": "remove by frequency"} -{"id": 31914, "token": "\u0120Milwaukee", "merges": "\u0120Mil waukee", "count": 82, "type": "remove by frequency"} -{"id": 16667, "token": "Appellant", "merges": "App ellant", "count": 82, "type": "remove by frequency"} -{"id": 45951, "token": "Inflater", "merges": "Infl ater", "count": 82, "type": "remove by frequency"} -{"id": 42973, "token": "\u0120Nate", "merges": "\u0120N ate", "count": 82, "type": "remove by frequency"} -{"id": 28052, "token": "\u0120\\[[", "merges": "\u0120\\[ [", "count": 82, "type": "remove by frequency"} -{"id": 39779, "token": "}=-", "merges": "}= -", "count": 82, "type": "remove by frequency"} -{"id": 23930, "token": "\u0120autophagy", "merges": "\u0120aut ophagy", "count": 83, "type": "remove by frequency"} -{"id": 48401, "token": "\u0120Isaiah", "merges": "\u0120Isa iah", "count": 83, "type": "remove by frequency"} -{"id": 46578, "token": "\u0120Syrac", "merges": "\u0120Sy rac", "count": 83, "type": "remove by frequency"} -{"id": 39379, "token": "\u0120Asst", "merges": "\u0120As st", "count": 83, "type": "remove by frequency"} -{"id": 29591, "token": ")}(\\", "merges": ")} (\\", "count": 83, "type": "remove by frequency"} -{"id": 8381, "token": "\u00c2\u0124", "merges": "\u00c2 \u0124", "count": 83, "type": "remove by frequency"} -{"id": 50290, "token": "\u00cb\u00a2", "merges": "\u00cb \u00a2", "count": 83, "type": "remove by frequency"} -{"id": 15466, "token": "\u0120WARRANTIES", "merges": "\u0120WARRANT IES", "count": 84, "type": "remove by frequency"} -{"id": 42521, "token": "\u0120leukocytes", "merges": "\u0120leuk ocytes", "count": 84, "type": "remove by frequency"} -{"id": 37911, "token": "\u0120Cardinals", "merges": "\u0120Card inals", "count": 84, "type": "remove by frequency"} -{"id": 41198, "token": "\u0120Jamaica", "merges": "\u0120Jama ica", "count": 84, "type": "remove by frequency"} -{"id": 32558, "token": "nolimits", "merges": "nol imits", "count": 84, "type": "remove by frequency"} -{"id": 6274, "token": "nonumber", "merges": "non umber", "count": 84, "type": "remove by frequency"} -{"id": 17959, "token": "\u0120Syrian", "merges": "\u0120Sy rian", "count": 84, "type": "remove by frequency"} -{"id": 20999, "token": "mathop", "merges": "math op", "count": 84, "type": "remove by frequency"} -{"id": 40288, "token": "\u0120$('.", "merges": "\u0120$(' .", "count": 84, "type": "remove by frequency"} -{"id": 33002, "token": "\u0120PPAR", "merges": "\u0120P PAR", "count": 84, "type": "remove by frequency"} -{"id": 9014, "token": "\\]),", "merges": "\\] ),", "count": 84, "type": "remove by frequency"} -{"id": 18955, "token": "{{{\\", "merges": "{ {{\\", "count": 84, "type": "remove by frequency"} -{"id": 35235, "token": "\u0120Memorandum", "merges": "\u0120Mem orandum", "count": 85, "type": "remove by frequency"} -{"id": 49106, "token": "\u0120Snowden", "merges": "\u0120Snow den", "count": 85, "type": "remove by frequency"} -{"id": 34917, "token": "\u0120\u00ce\u00ba\u00ce\u00b1\u00e1\u00bd\u00b6", "merges": "\u0120\u00ce\u00ba\u00ce\u00b1 \u00e1\u00bd\u00b6", "count": 85, "type": "remove by frequency"} -{"id": 27751, "token": "\u0120Vermont", "merges": "\u0120Verm ont", "count": 85, "type": "remove by frequency"} -{"id": 36388, "token": "\u0120Nearly", "merges": "\u0120N early", "count": 85, "type": "remove by frequency"} -{"id": 48386, "token": "\u0120Venet", "merges": "\u0120Ven et", "count": 85, "type": "remove by frequency"} -{"id": 43168, "token": "\u0120brane", "merges": "\u0120br ane", "count": 85, "type": "remove by frequency"} -{"id": 45079, "token": "\u0120ovat", "merges": "\u0120ov at", "count": 85, "type": "remove by frequency"} -{"id": 27852, "token": "\u0120(\u00c2\u00b1", "merges": "\u0120( \u00c2\u00b1", "count": 85, "type": "remove by frequency"} -{"id": 27628, "token": "\u0120Importantly", "merges": "\u0120Import antly", "count": 86, "type": "remove by frequency"} -{"id": 36405, "token": "\u0120Sacramento", "merges": "\u0120Sac ramento", "count": 86, "type": "remove by frequency"} -{"id": 38641, "token": "\u0120cyclin", "merges": "\u0120cycl in", "count": 86, "type": "remove by frequency"} -{"id": 49970, "token": "\u0120Quran", "merges": "\u0120Q uran", "count": 86, "type": "remove by frequency"} -{"id": 50180, "token": "\u0120Omaha", "merges": "\u0120O maha", "count": 86, "type": "remove by frequency"} -{"id": 45657, "token": "underbrace", "merges": "under brace", "count": 87, "type": "remove by frequency"} -{"id": 17318, "token": "smallcaps", "merges": "small caps", "count": 87, "type": "remove by frequency"} -{"id": 48610, "token": "\u0120Sanskrit", "merges": "\u0120Sans krit", "count": 87, "type": "remove by frequency"} -{"id": 27637, "token": "}}}{\\", "merges": "}} }{\\", "count": 87, "type": "remove by frequency"} -{"id": 49217, "token": "\u0120inducible", "merges": "\u0120indu cible", "count": 88, "type": "remove by frequency"} -{"id": 48981, "token": "\u0120autosomal", "merges": "\u0120aut osomal", "count": 88, "type": "remove by frequency"} -{"id": 41984, "token": "\u0120Cambodia", "merges": "\u0120Camb odia", "count": 88, "type": "remove by frequency"} -{"id": 40104, "token": "\u0120Connie", "merges": "\u0120Con nie", "count": 88, "type": "remove by frequency"} -{"id": 34905, "token": "\u0120Ghana", "merges": "\u0120Gh ana", "count": 88, "type": "remove by frequency"} -{"id": 37664, "token": "\u0120Particularly", "merges": "\u0120Part icularly", "count": 89, "type": "remove by frequency"} -{"id": 43424, "token": "\u0120Socrates", "merges": "\u0120S ocrates", "count": 89, "type": "remove by frequency"} -{"id": 40346, "token": "\u0120Guatem", "merges": "\u0120Gu atem", "count": 89, "type": "remove by frequency"} -{"id": 19446, "token": "})$,", "merges": "} )$,", "count": 89, "type": "remove by frequency"} -{"id": 43182, "token": "\u00e1\u00bd\u0132", "merges": "\u00e1\u00bd \u0132", "count": 89, "type": "remove by frequency"} -{"id": 40188, "token": ")}^", "merges": ") }^", "count": 89, "type": "remove by frequency"} -{"id": 44772, "token": "\u0120Strickland", "merges": "\u0120Strick land", "count": 90, "type": "remove by frequency"} -{"id": 38968, "token": "\u0120phospho", "merges": "\u0120phosph o", "count": 90, "type": "remove by frequency"} -{"id": 38463, "token": "\u0120Abor", "merges": "\u0120A bor", "count": 90, "type": "remove by frequency"} -{"id": 6263, "token": "_{{\\", "merges": "_{ {\\", "count": 90, "type": "remove by frequency"} -{"id": 41883, "token": "'}(", "merges": "' }(", "count": 90, "type": "remove by frequency"} -{"id": 15638, "token": "&=&", "merges": "&= &", "count": 90, "type": "remove by frequency"} -{"id": 39507, "token": "\u0120INTRODUCTION", "merges": "\u0120IN TRODUCTION", "count": 91, "type": "remove by frequency"} -{"id": 47129, "token": "\u0120Escherichia", "merges": "\u0120Esc herichia", "count": 91, "type": "remove by frequency"} -{"id": 47778, "token": "\u0120proteases", "merges": "\u0120prote ases", "count": 91, "type": "remove by frequency"} -{"id": 12750, "token": "setminus", "merges": "set minus", "count": 91, "type": "remove by frequency"} -{"id": 44359, "token": "})^{-", "merges": "}) ^{-", "count": 91, "type": "remove by frequency"} -{"id": 29864, "token": "\u0120DMSO", "merges": "\u0120D MSO", "count": 91, "type": "remove by frequency"} -{"id": 39115, "token": "},\\\\", "merges": "},\\ \\", "count": 91, "type": "remove by frequency"} -{"id": 40673, "token": "\u00e2\u012a\u00bc", "merges": "\u00e2\u012a \u00bc", "count": 91, "type": "remove by frequency"} -{"id": 34140, "token": "\u0120Holocaust", "merges": "\u0120Hol ocaust", "count": 92, "type": "remove by frequency"} -{"id": 44466, "token": "lifeless", "merges": "lif eless", "count": 92, "type": "remove by frequency"} -{"id": 34921, "token": "\u0120Surely", "merges": "\u0120Sure ly", "count": 92, "type": "remove by frequency"} -{"id": 29179, "token": "rbrack", "merges": "r brack", "count": 92, "type": "remove by frequency"} -{"id": 32738, "token": "\u0120Kurd", "merges": "\u0120K urd", "count": 92, "type": "remove by frequency"} -{"id": 26058, "token": "\u0120MPs", "merges": "\u0120M Ps", "count": 92, "type": "remove by frequency"} -{"id": 48542, "token": ")_\\", "merges": ") _\\", "count": 92, "type": "remove by frequency"} -{"id": 31232, "token": "\u0120phosphorylated", "merges": "\u0120phosphory lated", "count": 93, "type": "remove by frequency"} -{"id": 28103, "token": "\u0120polymorphisms", "merges": "\u0120polymorph isms", "count": 93, "type": "remove by frequency"} -{"id": 46657, "token": "\u0120Armenian", "merges": "\u0120Armen ian", "count": 93, "type": "remove by frequency"} -{"id": 27373, "token": "\u0120Oakland", "merges": "\u0120Oak land", "count": 93, "type": "remove by frequency"} -{"id": 37959, "token": "\u0120Serbia", "merges": "\u0120Ser bia", "count": 93, "type": "remove by frequency"} -{"id": 8827, "token": "rsfs", "merges": "rs fs", "count": 93, "type": "remove by frequency"} -{"id": 33211, "token": "\u00ce\u00bf\u00ce\u00bb", "merges": "\u00ce\u00bf \u00ce\u00bb", "count": 93, "type": "remove by frequency"} -{"id": 5539, "token": "ensuremath", "merges": "ensure math", "count": 94, "type": "remove by frequency"} -{"id": 28787, "token": "\u0120Hispanic", "merges": "\u0120His panic", "count": 94, "type": "remove by frequency"} -{"id": 48986, "token": "\u0120cytosol", "merges": "\u0120cytos ol", "count": 94, "type": "remove by frequency"} -{"id": 27247, "token": "\u0120Ethiop", "merges": "\u0120Eth iop", "count": 94, "type": "remove by frequency"} -{"id": 49119, "token": "\u0120Gideon", "merges": "\u0120G ideon", "count": 94, "type": "remove by frequency"} -{"id": 29477, "token": "\u0120hab\u00c3\u0143a", "merges": "\u0120hab \u00c3\u0143a", "count": 94, "type": "remove by frequency"} -{"id": 29580, "token": "\u0120Bernie", "merges": "\u0120Bern ie", "count": 94, "type": "remove by frequency"} -{"id": 38845, "token": "\u0120Sask", "merges": "\u0120S ask", "count": 94, "type": "remove by frequency"} -{"id": 48718, "token": "\u0120Nell", "merges": "\u0120N ell", "count": 94, "type": "remove by frequency"} -{"id": 40161, "token": "}}{(", "merges": "}}{ (", "count": 94, "type": "remove by frequency"} -{"id": 23505, "token": ".\\[", "merges": ". \\[", "count": 94, "type": "remove by frequency"} -{"id": 21167, "token": "\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142", "merges": "\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142 \u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142", "count": 95, "type": "remove by frequency"} -{"id": 41782, "token": "\u0120contralateral", "merges": "\u0120contral ateral", "count": 95, "type": "remove by frequency"} -{"id": 45805, "token": "\u0120cytoskeleton", "merges": "\u0120cytos keleton", "count": 95, "type": "remove by frequency"} -{"id": 46662, "token": "\u0120microg", "merges": "\u0120micro g", "count": 95, "type": "remove by frequency"} -{"id": 21411, "token": "overset", "merges": "overs et", "count": 95, "type": "remove by frequency"} -{"id": 49211, "token": "\u0120Tenth", "merges": "\u0120T enth", "count": 95, "type": "remove by frequency"} -{"id": 22891, "token": "\u0120Legislature", "merges": "\u0120Legisl ature", "count": 96, "type": "remove by frequency"} -{"id": 44943, "token": "\u0120Naturally", "merges": "\u0120N aturally", "count": 96, "type": "remove by frequency"} -{"id": 28776, "token": "\u0120Notably", "merges": "\u0120Not ably", "count": 96, "type": "remove by frequency"} -{"id": 41330, "token": "fefefe", "merges": "fefe fe", "count": 96, "type": "remove by frequency"} -{"id": 44526, "token": "\u0120Mets", "merges": "\u0120M ets", "count": 96, "type": "remove by frequency"} -{"id": 24768, "token": "},{\\", "merges": "}, {\\", "count": 96, "type": "remove by frequency"} -{"id": 45307, "token": "\u0120H\u00c3\u00b6", "merges": "\u0120H \u00c3\u00b6", "count": 96, "type": "remove by frequency"} -{"id": 52398, "token": "\u00e6\u0143\u013d", "merges": "\u00e6\u0143 \u013d", "count": 96, "type": "remove by frequency"} -{"id": 44259, "token": "Acknowledgments", "merges": "Acknowled gments", "count": 97, "type": "remove by frequency"} -{"id": 49937, "token": "\u0120Chevrolet", "merges": "\u0120Chev rolet", "count": 97, "type": "remove by frequency"} -{"id": 49680, "token": "\u0120hadronic", "merges": "\u0120had ronic", "count": 97, "type": "remove by frequency"} -{"id": 49457, "token": "\u0120HOWEVER", "merges": "\u0120HOW EVER", "count": 97, "type": "remove by frequency"} -{"id": 37304, "token": "\u0120Calgary", "merges": "\u0120Cal gary", "count": 97, "type": "remove by frequency"} -{"id": 42499, "token": "\u0120m\u00c3\u00a5ste", "merges": "\u0120m\u00c3\u00a5 ste", "count": 97, "type": "remove by frequency"} -{"id": 17098, "token": "}{{\\", "merges": "}{ {\\", "count": 97, "type": "remove by frequency"} -{"id": 46149, "token": "\u0120Hsp", "merges": "\u0120H sp", "count": 97, "type": "remove by frequency"} -{"id": 46075, "token": "\u0120idiopathic", "merges": "\u0120idi opathic", "count": 98, "type": "remove by frequency"} -{"id": 45585, "token": "\u0120linebacker", "merges": "\u0120lineback er", "count": 98, "type": "remove by frequency"} -{"id": 49702, "token": "\u0120proteasome", "merges": "\u0120prote asome", "count": 98, "type": "remove by frequency"} -{"id": 49402, "token": "\u0120Athletics", "merges": "\u0120Athlet ics", "count": 98, "type": "remove by frequency"} -{"id": 45575, "token": "\u0120Romanian", "merges": "\u0120Roman ian", "count": 98, "type": "remove by frequency"} -{"id": 43828, "token": "\u0120Myanmar", "merges": "\u0120My anmar", "count": 98, "type": "remove by frequency"} -{"id": 46019, "token": "Argued", "merges": "Arg ued", "count": 98, "type": "remove by frequency"} -{"id": 36317, "token": "\u0120FIGS", "merges": "\u0120FIG S", "count": 98, "type": "remove by frequency"} -{"id": 45496, "token": "\u0120(!(", "merges": "\u0120(! (", "count": 98, "type": "remove by frequency"} -{"id": 38217, "token": "(@\"", "merges": "( @\"", "count": 98, "type": "remove by frequency"} -{"id": 34488, "token": ").$", "merges": "). $", "count": 98, "type": "remove by frequency"} -{"id": 38409, "token": "leftrightarrow", "merges": "left rightarrow", "count": 99, "type": "remove by frequency"} -{"id": 46409, "token": "\u0120histologic", "merges": "\u0120hist ologic", "count": 99, "type": "remove by frequency"} -{"id": 38173, "token": "ClCompile", "merges": "Cl Compile", "count": 99, "type": "remove by frequency"} -{"id": 46936, "token": "\u0120Kazakh", "merges": "\u0120Kaz akh", "count": 99, "type": "remove by frequency"} -{"id": 44660, "token": "\u0120Ravens", "merges": "\u0120Rav ens", "count": 99, "type": "remove by frequency"} -{"id": 38327, "token": "))/(-", "merges": ")) /(-", "count": 99, "type": "remove by frequency"} -{"id": 43790, "token": ")\\,\\", "merges": ")\\ ,\\", "count": 99, "type": "remove by frequency"} -{"id": 50304, "token": "\u00e2\u0123\u00bf", "merges": "\u00e2\u0123 \u00bf", "count": 99, "type": "remove by frequency"} -{"id": 50322, "token": "\u00e2\u012a\u00a5", "merges": "\u00e2\u012a \u00a5", "count": 99, "type": "remove by frequency"} -{"id": 46608, "token": "*~(", "merges": "*~ (", "count": 99, "type": "remove by frequency"} -{"id": 50394, "token": "\u00e3\u0122\u0135", "merges": "\u00e3\u0122 \u0135", "count": 99, "type": "remove by frequency"} -{"id": 43077, "token": "\u00e1\u00bf\u0138", "merges": "\u00e1\u00bf \u0138", "count": 99, "type": "remove by frequency"} -{"id": 20475, "token": "\u00c2\u013f", "merges": "\u00c2 \u013f", "count": 99, "type": "remove by frequency"} -{"id": 23879, "token": "\u0120transfection", "merges": "\u0120trans fection", "count": 100, "type": "remove by frequency"} -{"id": 39966, "token": "\u0120\u00c3\u00b6ver", "merges": "\u0120\u00c3\u00b6 ver", "count": 100, "type": "remove by frequency"} -{"id": 48660, "token": "\u0120Reds", "merges": "\u0120Red s", "count": 100, "type": "remove by frequency"} -{"id": 35969, "token": "\u0120L\u00c3\u00a9", "merges": "\u0120L \u00c3\u00a9", "count": 100, "type": "remove by frequency"} -{"id": 53171, "token": "\u00e7\u00a8\u013e", "merges": "\u00e7\u00a8 \u013e", "count": 100, "type": "remove by frequency"} -{"id": 49377, "token": "\u00c4\u0123m", "merges": "\u00c4\u0123 m", "count": 100, "type": "remove by frequency"} -{"id": 36242, "token": "\u0120Approximately", "merges": "\u0120Appro ximately", "count": 101, "type": "remove by frequency"} -{"id": 38623, "token": "\u0120CONTRIBUTORS", "merges": "\u0120CONTRIBUT ORS", "count": 101, "type": "remove by frequency"} -{"id": 49238, "token": "\u0120anesthetized", "merges": "\u0120anest hetized", "count": 101, "type": "remove by frequency"} -{"id": 48633, "token": "\u0120microtubule", "merges": "\u0120microtub ule", "count": 101, "type": "remove by frequency"} -{"id": 48844, "token": "\u0120photoresist", "merges": "\u0120phot oresist", "count": 101, "type": "remove by frequency"} -{"id": 43393, "token": "\u0120neuropath", "merges": "\u0120neurop ath", "count": 101, "type": "remove by frequency"} -{"id": 37384, "token": "\u0120\u00cf\u0125\u00cf\u0126\u00ce\u00b7", "merges": "\u0120\u00cf\u0125 \u00cf\u0126\u00ce\u00b7", "count": 101, "type": "remove by frequency"} -{"id": 25135, "token": "}^{+", "merges": "}^{ +", "count": 101, "type": "remove by frequency"} -{"id": 12410, "token": "\u00c2\u0123", "merges": "\u00c2 \u0123", "count": 101, "type": "remove by frequency"} -{"id": 27608, "token": "\u0120Venezuela", "merges": "\u0120Venezuel a", "count": 102, "type": "remove by frequency"} -{"id": 27794, "token": "\u0120Victorian", "merges": "\u0120Victor ian", "count": 102, "type": "remove by frequency"} -{"id": 39519, "token": "\u0120ubiquitin", "merges": "\u0120ubiquit in", "count": 102, "type": "remove by frequency"} -{"id": 22565, "token": "\u0120Sheriff", "merges": "\u0120Sher iff", "count": 102, "type": "remove by frequency"} -{"id": 48907, "token": "\u0120Allies", "merges": "\u0120All ies", "count": 102, "type": "remove by frequency"} -{"id": 44489, "token": "\u0120Naples", "merges": "\u0120Na ples", "count": 102, "type": "remove by frequency"} -{"id": 45602, "token": "\u0120MRSA", "merges": "\u0120MR SA", "count": 102, "type": "remove by frequency"} -{"id": 27889, "token": "pbio", "merges": "pb io", "count": 102, "type": "remove by frequency"} -{"id": 44177, "token": "\u00c3\u00bdm", "merges": "\u00c3\u00bd m", "count": 102, "type": "remove by frequency"} -{"id": 16525, "token": "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------", "merges": "-------------------------------------------------------------------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------------------------", "count": 103, "type": "remove by frequency"} -{"id": 41878, "token": "\u0120\u00ce\u00b4\u00ce\u00b9\u00ce\u00b1", "merges": "\u0120\u00ce\u00b4 \u00ce\u00b9\u00ce\u00b1", "count": 103, "type": "remove by frequency"} -{"id": 41068, "token": "\u0120antitumor", "merges": "\u0120antit umor", "count": 104, "type": "remove by frequency"} -{"id": 50178, "token": "\u0120Lindsey", "merges": "\u0120Lind sey", "count": 104, "type": "remove by frequency"} -{"id": 31351, "token": "\u0120Belgian", "merges": "\u0120Bel gian", "count": 104, "type": "remove by frequency"} -{"id": 40839, "token": "\u0120Chiefs", "merges": "\u0120Chief s", "count": 104, "type": "remove by frequency"} -{"id": 48911, "token": "\u0120\u00ce\u00bc\u00ce\u0143", "merges": "\u0120\u00ce\u00bc \u00ce\u0143", "count": 104, "type": "remove by frequency"} -{"id": 16265, "token": "}}}(", "merges": "}} }(", "count": 104, "type": "remove by frequency"} -{"id": 32998, "token": "})_{", "merges": "}) _{", "count": 104, "type": "remove by frequency"} -{"id": 29313, "token": "\u0120Palestine", "merges": "\u0120Palest ine", "count": 105, "type": "remove by frequency"} -{"id": 48628, "token": "\u0120doxor", "merges": "\u0120do xor", "count": 105, "type": "remove by frequency"} -{"id": 21627, "token": "}\\,\\", "merges": "}\\ ,\\", "count": 105, "type": "remove by frequency"} -{"id": 47898, "token": "\u0120h\u00c3\u00a5", "merges": "\u0120h \u00c3\u00a5", "count": 105, "type": "remove by frequency"} -{"id": 49740, "token": "\u0120Utt", "merges": "\u0120U tt", "count": 105, "type": "remove by frequency"} -{"id": 46186, "token": ":{\\", "merges": ": {\\", "count": 105, "type": "remove by frequency"} -{"id": 35622, "token": "\u0120astrocytes", "merges": "\u0120ast rocytes", "count": 106, "type": "remove by frequency"} -{"id": 10823, "token": "\u0120Democrats", "merges": "\u0120Democr ats", "count": 106, "type": "remove by frequency"} -{"id": 30142, "token": "\u0120comorbid", "merges": "\u0120comor bid", "count": 106, "type": "remove by frequency"} -{"id": 47823, "token": "\u0120INDIRECT", "merges": "\u0120IN DIRECT", "count": 106, "type": "remove by frequency"} -{"id": 38159, "token": "apoptotic", "merges": "apopt otic", "count": 106, "type": "remove by frequency"} -{"id": 20176, "token": "\u0120Nebraska", "merges": "\u0120Ne braska", "count": 106, "type": "remove by frequency"} -{"id": 39863, "token": "\u0120Eston", "merges": "\u0120E ston", "count": 106, "type": "remove by frequency"} -{"id": 48744, "token": ")\\<", "merges": ")\\ <", "count": 106, "type": "remove by frequency"} -{"id": 48977, "token": "\u0120erythrocytes", "merges": "\u0120eryth rocytes", "count": 107, "type": "remove by frequency"} -{"id": 31737, "token": "\u0120transgender", "merges": "\u0120trans gender", "count": 107, "type": "remove by frequency"} -{"id": 29089, "token": "\u0120Cincinnati", "merges": "\u0120Cinc innati", "count": 107, "type": "remove by frequency"} -{"id": 47804, "token": "\u0120quelques", "merges": "\u0120quel ques", "count": 107, "type": "remove by frequency"} -{"id": 48104, "token": "\u0120Lisbon", "merges": "\u0120Lis bon", "count": 107, "type": "remove by frequency"} -{"id": 24918, "token": "\u0120$('#", "merges": "\u0120$(' #", "count": 107, "type": "remove by frequency"} -{"id": 41447, "token": "[\\#", "merges": "[\\ #", "count": 107, "type": "remove by frequency"} -{"id": 47625, "token": "\u0120subcellular", "merges": "\u0120sub cellular", "count": 108, "type": "remove by frequency"} -{"id": 43005, "token": "\u00ce\u00bf\u00cf\u0127\u00ce\u00bc\u00ce\u00b5", "merges": "\u00ce\u00bf\u00cf\u0127 \u00ce\u00bc\u00ce\u00b5", "count": 108, "type": "remove by frequency"} -{"id": 36763, "token": "\u0120Ottoman", "merges": "\u0120Ott oman", "count": 108, "type": "remove by frequency"} -{"id": 49444, "token": "\u0120Lenin", "merges": "\u0120Len in", "count": 108, "type": "remove by frequency"} -{"id": 23882, "token": "|^{", "merges": "| ^{", "count": 108, "type": "remove by frequency"} -{"id": 30522, "token": "]^{", "merges": "] ^{", "count": 108, "type": "remove by frequency"} -{"id": 41254, "token": "\u0120\u00c3\u00ab", "merges": "\u0120\u00c3 \u00ab", "count": 108, "type": "remove by frequency"} -{"id": 38843, "token": "\u0120surfactant", "merges": "\u0120surfact ant", "count": 109, "type": "remove by frequency"} -{"id": 49640, "token": "\u0120pretreated", "merges": "\u0120pret reated", "count": 109, "type": "remove by frequency"} -{"id": 25794, "token": "\u0120Delaware", "merges": "\u0120Del aware", "count": 109, "type": "remove by frequency"} -{"id": 42577, "token": "\u0120hypoxic", "merges": "\u0120hypox ic", "count": 109, "type": "remove by frequency"} -{"id": 32681, "token": "\u0120\u00cf\u0126\u00cf\u012b\u00ce\u00bd", "merges": "\u0120\u00cf\u0126 \u00cf\u012b\u00ce\u00bd", "count": 109, "type": "remove by frequency"} -{"id": 39272, "token": "}}}}\\", "merges": "}} }}\\", "count": 109, "type": "remove by frequency"} -{"id": 33697, "token": "\u0120DCs", "merges": "\u0120D Cs", "count": 109, "type": "remove by frequency"} -{"id": 10952, "token": "\\}$", "merges": "\\ }$", "count": 109, "type": "remove by frequency"} -{"id": 45550, "token": "OnClickListener", "merges": "On ClickListener", "count": 110, "type": "remove by frequency"} -{"id": 9892, "token": "\u0120Appeals", "merges": "\u0120Appe als", "count": 110, "type": "remove by frequency"} -{"id": 38756, "token": "\u0120trypsin", "merges": "\u0120try psin", "count": 110, "type": "remove by frequency"} -{"id": 38752, "token": "\u0120Albany", "merges": "\u0120Alb any", "count": 110, "type": "remove by frequency"} -{"id": 16158, "token": "\u0120Saudi", "merges": "\u0120S audi", "count": 110, "type": "remove by frequency"} -{"id": 46147, "token": "\u0120Kathy", "merges": "\u0120K athy", "count": 110, "type": "remove by frequency"} -{"id": 41277, "token": "\u0120Juda", "merges": "\u0120Jud a", "count": 110, "type": "remove by frequency"} -{"id": 34145, "token": "biggr", "merges": "big gr", "count": 110, "type": "remove by frequency"} -{"id": 16160, "token": "*\\<", "merges": "* \\<", "count": 110, "type": "remove by frequency"} -{"id": 28511, "token": ";\\;", "merges": ";\\ ;", "count": 110, "type": "remove by frequency"} -{"id": 37447, "token": "\u00db\u0137", "merges": "\u00db \u0137", "count": 110, "type": "remove by frequency"} -{"id": 40099, "token": "\u0120proliferative", "merges": "\u0120prolifer ative", "count": 111, "type": "remove by frequency"} -{"id": 47486, "token": "\u0120Volkswagen", "merges": "\u0120Volks wagen", "count": 111, "type": "remove by frequency"} -{"id": 38717, "token": "\u0120mitotic", "merges": "\u0120mit otic", "count": 111, "type": "remove by frequency"} -{"id": 30841, "token": "\u0120Congressional", "merges": "\u0120Cong ressional", "count": 112, "type": "remove by frequency"} -{"id": 22283, "token": "\u0120Venezuel", "merges": "\u0120V enezuel", "count": 112, "type": "remove by frequency"} -{"id": 48641, "token": "\u0120Marxist", "merges": "\u0120Marx ist", "count": 112, "type": "remove by frequency"} -{"id": 28746, "token": "\u0120Lebanon", "merges": "\u0120Leban on", "count": 113, "type": "remove by frequency"} -{"id": 49456, "token": "\u0120\u00ce\u00b4\u00ce\u00b5\u00ce\u00bd", "merges": "\u0120\u00ce\u00b4 \u00ce\u00b5\u00ce\u00bd", "count": 113, "type": "remove by frequency"} -{"id": 29539, "token": "\u0120Ottawa", "merges": "\u0120Ot tawa", "count": 113, "type": "remove by frequency"} -{"id": 48002, "token": "\u0120\u00cf\u0126\u00ce\u00b9\u00cf\u0124", "merges": "\u0120\u00cf\u0126 \u00ce\u00b9\u00cf\u0124", "count": 113, "type": "remove by frequency"} -{"id": 32345, "token": "\u0120Niger", "merges": "\u0120N iger", "count": 113, "type": "remove by frequency"} -{"id": 38382, "token": "\u0120Bris", "merges": "\u0120Br is", "count": 113, "type": "remove by frequency"} -{"id": 28280, "token": "\u0120Morm", "merges": "\u0120M orm", "count": 113, "type": "remove by frequency"} -{"id": 27805, "token": "lVert", "merges": "l Vert", "count": 113, "type": "remove by frequency"} -{"id": 42052, "token": "\u0120\u00c2\u00bb.", "merges": "\u0120\u00c2\u00bb .", "count": 113, "type": "remove by frequency"} -{"id": 46673, "token": "\u00e1\u00bd\u00b0", "merges": "\u00e1\u00bd \u00b0", "count": 113, "type": "remove by frequency"} -{"id": 54216, "token": "\u00e9\u0129\u00a6", "merges": "\u00e9\u0129 \u00a6", "count": 113, "type": "remove by frequency"} -{"id": 46559, "token": "\u0120Brighton", "merges": "\u0120Bright on", "count": 114, "type": "remove by frequency"} -{"id": 22306, "token": "\u0120Barack", "merges": "\u0120Bar ack", "count": 114, "type": "remove by frequency"} -{"id": 33074, "token": "\u0120Cuban", "merges": "\u0120Cub an", "count": 114, "type": "remove by frequency"} -{"id": 44558, "token": "\u0120Malay", "merges": "\u0120Mal ay", "count": 114, "type": "remove by frequency"} -{"id": 46890, "token": ")}}{", "merges": ") }}{", "count": 114, "type": "remove by frequency"} -{"id": 43869, "token": "\u0120\u00c5\u0142", "merges": "\u0120\u00c5 \u0142", "count": 114, "type": "remove by frequency"} -{"id": 41491, "token": "\u0120Worcester", "merges": "\u0120Wor cester", "count": 115, "type": "remove by frequency"} -{"id": 47374, "token": "\u0120Lakers", "merges": "\u0120L akers", "count": 115, "type": "remove by frequency"} -{"id": 45248, "token": "\u0120Manit", "merges": "\u0120Man it", "count": 115, "type": "remove by frequency"} -{"id": 47979, "token": "\u0120Abbas", "merges": "\u0120Ab bas", "count": 115, "type": "remove by frequency"} -{"id": 32710, "token": "\u0120Modi", "merges": "\u0120Mod i", "count": 115, "type": "remove by frequency"} -{"id": 29310, "token": "\u0120Bangladesh", "merges": "\u0120Bangl adesh", "count": 116, "type": "remove by frequency"} -{"id": 30186, "token": "\u0120Pentagon", "merges": "\u0120Pent agon", "count": 116, "type": "remove by frequency"} -{"id": 44867, "token": "\u0120Belfast", "merges": "\u0120Belf ast", "count": 116, "type": "remove by frequency"} -{"id": 32109, "token": "\u0120lactate", "merges": "\u0120lact ate", "count": 116, "type": "remove by frequency"} -{"id": 32111, "token": "\u0120Mumbai", "merges": "\u0120M umbai", "count": 116, "type": "remove by frequency"} -{"id": 24460, "token": "\u0120Romney", "merges": "\u0120Rom ney", "count": 116, "type": "remove by frequency"} -{"id": 26874, "token": "lbrack", "merges": "l brack", "count": 116, "type": "remove by frequency"} -{"id": 40301, "token": "\u00ce\u00af\u00ce\u00b1\u00cf\u0124", "merges": "\u00ce\u00af\u00ce\u00b1 \u00cf\u0124", "count": 116, "type": "remove by frequency"} -{"id": 47478, "token": "\u0120TBI", "merges": "\u0120T BI", "count": 116, "type": "remove by frequency"} -{"id": 37890, "token": "\u00c3\u00a2nd", "merges": "\u00c3\u00a2 nd", "count": 116, "type": "remove by frequency"} -{"id": 50384, "token": "\u00e2\u013b\u00ac", "merges": "\u00e2\u013b \u00ac", "count": 116, "type": "remove by frequency"} -{"id": 26260, "token": "\u0120--------------------------------------------------------------------------------------------------------------------------------", "merges": "\u0120 --------------------------------------------------------------------------------------------------------------------------------", "count": 117, "type": "remove by frequency"} -{"id": 38521, "token": "\u0120lineback", "merges": "\u0120line back", "count": 117, "type": "remove by frequency"} -{"id": 30489, "token": "lfloor", "merges": "l floor", "count": 117, "type": "remove by frequency"} -{"id": 47752, "token": "\u0120Bronx", "merges": "\u0120Bron x", "count": 117, "type": "remove by frequency"} -{"id": 45690, "token": "\u0120Scots", "merges": "\u0120Sc ots", "count": 117, "type": "remove by frequency"} -{"id": 29135, "token": "\u0120\u00e1\u00bc\u0132", "merges": "\u0120\u00e1\u00bc \u0132", "count": 117, "type": "remove by frequency"} -{"id": 43026, "token": "\u0120QTL", "merges": "\u0120Q TL", "count": 117, "type": "remove by frequency"} -{"id": 48234, "token": "\u00e1\u00bf\u00a6", "merges": "\u00e1\u00bf \u00a6", "count": 117, "type": "remove by frequency"} -{"id": 26808, "token": "nanomaterials", "merges": "nan omaterials", "count": 118, "type": "remove by frequency"} -{"id": 27756, "token": "\u0120Idaho", "merges": "\u0120Id aho", "count": 118, "type": "remove by frequency"} -{"id": 42811, "token": "\u0120Pero", "merges": "\u0120P ero", "count": 118, "type": "remove by frequency"} -{"id": 12460, "token": "}}$,", "merges": "}} $,", "count": 118, "type": "remove by frequency"} -{"id": 46363, "token": "\u0120HCT", "merges": "\u0120H CT", "count": 118, "type": "remove by frequency"} -{"id": 46346, "token": "\u0120histopathological", "merges": "\u0120hist opathological", "count": 119, "type": "remove by frequency"} -{"id": 43326, "token": "\u0120Hawaiian", "merges": "\u0120Hawai ian", "count": 119, "type": "remove by frequency"} -{"id": 44637, "token": "\u0120Cyprus", "merges": "\u0120Cy prus", "count": 119, "type": "remove by frequency"} -{"id": 40316, "token": "\u0120Devon", "merges": "\u0120Dev on", "count": 119, "type": "remove by frequency"} -{"id": 33984, "token": "\u0120CURI", "merges": "\u0120C URI", "count": 119, "type": "remove by frequency"} -{"id": 31878, "token": "\u0120Hait", "merges": "\u0120H ait", "count": 119, "type": "remove by frequency"} -{"id": 44123, "token": "\u0120Liam", "merges": "\u0120Li am", "count": 119, "type": "remove by frequency"} -{"id": 33509, "token": "INCLUDING", "merges": "IN CLUDING", "count": 120, "type": "remove by frequency"} -{"id": 29535, "token": "\u0120Austrian", "merges": "\u0120Aust rian", "count": 120, "type": "remove by frequency"} -{"id": 47627, "token": "\u0120Deborah", "merges": "\u0120Debor ah", "count": 120, "type": "remove by frequency"} -{"id": 36819, "token": "\u0120Tory", "merges": "\u0120T ory", "count": 120, "type": "remove by frequency"} -{"id": 49220, "token": "\u0120MgCl", "merges": "\u0120Mg Cl", "count": 120, "type": "remove by frequency"} -{"id": 44797, "token": "\u0120ktor", "merges": "\u0120k tor", "count": 120, "type": "remove by frequency"} -{"id": 14658, "token": "\u0120\u00e2\u013b\u00aa", "merges": "\u0120\u00e2 \u013b\u00aa", "count": 120, "type": "remove by frequency"} -{"id": 17849, "token": "\u0120s\u00c4\u0125", "merges": "\u0120s \u00c4\u0125", "count": 120, "type": "remove by frequency"} -{"id": 20863, "token": "\u0120MERCHANTABILITY", "merges": "\u0120MER CHANTABILITY", "count": 121, "type": "remove by frequency"} -{"id": 43164, "token": "\u0120fprintf", "merges": "\u0120f printf", "count": 121, "type": "remove by frequency"} -{"id": 30266, "token": "rfloor", "merges": "r floor", "count": 121, "type": "remove by frequency"} -{"id": 47403, "token": "\u00cf\u0122\u00ce\u00b5\u00ce\u00b9", "merges": "\u00cf\u0122 \u00ce\u00b5\u00ce\u00b9", "count": 121, "type": "remove by frequency"} -{"id": 45751, "token": "\u0120Photograph", "merges": "\u0120Phot ograph", "count": 122, "type": "remove by frequency"} -{"id": 27485, "token": "\u0120Ukrainian", "merges": "\u0120Uk rainian", "count": 122, "type": "remove by frequency"} -{"id": 30167, "token": "\u0120Brussels", "merges": "\u0120Br ussels", "count": 122, "type": "remove by frequency"} -{"id": 40115, "token": "\u0120Lithuan", "merges": "\u0120Lith uan", "count": 122, "type": "remove by frequency"} -{"id": 49786, "token": "\u0120Minist", "merges": "\u0120Min ist", "count": 122, "type": "remove by frequency"} -{"id": 30228, "token": "\u0120Croat", "merges": "\u0120Cro at", "count": 122, "type": "remove by frequency"} -{"id": 35280, "token": "\u00ce\u00bf\u00cf\u0127\u00ce\u00bd", "merges": "\u00ce\u00bf\u00cf\u0127 \u00ce\u00bd", "count": 122, "type": "remove by frequency"} -{"id": 36587, "token": "=\"../../../", "merges": "=\"../../ ../", "count": 123, "type": "remove by frequency"} -{"id": 48684, "token": "\u0120rehearing", "merges": "\u0120rehe aring", "count": 123, "type": "remove by frequency"} -{"id": 41414, "token": "\u0120Walmart", "merges": "\u0120Wal mart", "count": 123, "type": "remove by frequency"} -{"id": 27325, "token": "\u0120Sudan", "merges": "\u0120Sud an", "count": 123, "type": "remove by frequency"} -{"id": 36748, "token": "\u00c3\u00a9tait", "merges": "\u00c3\u00a9 tait", "count": 123, "type": "remove by frequency"} -{"id": 50340, "token": "\u00e2\u0136\u012c", "merges": "\u00e2\u0136 \u012c", "count": 123, "type": "remove by frequency"} -{"id": 37207, "token": "}|^", "merges": "}| ^", "count": 123, "type": "remove by frequency"} -{"id": 16111, "token": "\u0120Afghanistan", "merges": "\u0120Afghan istan", "count": 124, "type": "remove by frequency"} -{"id": 24646, "token": "\u0120Conversely", "merges": "\u0120Con versely", "count": 124, "type": "remove by frequency"} -{"id": 34402, "token": "\u0120cisplatin", "merges": "\u0120cis platin", "count": 124, "type": "remove by frequency"} -{"id": 17753, "token": "\u0120Louisiana", "merges": "\u0120Louis iana", "count": 124, "type": "remove by frequency"} -{"id": 22514, "token": "\u0120Leban", "merges": "\u0120Le ban", "count": 124, "type": "remove by frequency"} -{"id": 43406, "token": "\u0120Jared", "merges": "\u0120J ared", "count": 124, "type": "remove by frequency"} -{"id": 49076, "token": "ophils", "merges": "oph ils", "count": 124, "type": "remove by frequency"} -{"id": 22295, "token": "\u0120c\u00c4\u0125", "merges": "\u0120c \u00c4\u0125", "count": 124, "type": "remove by frequency"} -{"id": 30765, "token": "\u00c3\u00b1or", "merges": "\u00c3\u00b1 or", "count": 124, "type": "remove by frequency"} -{"id": 35311, "token": "\u0120immunohistochemistry", "merges": "\u0120immunohist ochemistry", "count": 125, "type": "remove by frequency"} -{"id": 47667, "token": "\u0120filos", "merges": "\u0120fil os", "count": 125, "type": "remove by frequency"} -{"id": 32080, "token": "\u0120\\{\\", "merges": "\u0120\\ {\\", "count": 125, "type": "remove by frequency"} -{"id": 33011, "token": "\u00cf\u012b\u00cf\u0124", "merges": "\u00cf\u012b \u00cf\u0124", "count": 125, "type": "remove by frequency"} -{"id": 44973, "token": "\u00e0\u00b0\u00bf", "merges": "\u00e0\u00b0 \u00bf", "count": 125, "type": "remove by frequency"} -{"id": 45685, "token": "\u00c4\u0123r", "merges": "\u00c4\u0123 r", "count": 125, "type": "remove by frequency"} -{"id": 49630, "token": "\u0120modulates", "merges": "\u0120modul ates", "count": 126, "type": "remove by frequency"} -{"id": 28150, "token": "\u0120Sergeant", "merges": "\u0120Ser geant", "count": 126, "type": "remove by frequency"} -{"id": 30109, "token": "\u0120Veterans", "merges": "\u0120Veter ans", "count": 126, "type": "remove by frequency"} -{"id": 45845, "token": "\u0120Tribunal", "merges": "\u0120Trib unal", "count": 126, "type": "remove by frequency"} -{"id": 48486, "token": "\u0120Himself", "merges": "\u0120Him self", "count": 126, "type": "remove by frequency"} -{"id": 47665, "token": "\u0120alanine", "merges": "\u0120al anine", "count": 126, "type": "remove by frequency"} -{"id": 32553, "token": "\u0120aryl", "merges": "\u0120ar yl", "count": 126, "type": "remove by frequency"} -{"id": 43391, "token": "})}{", "merges": "}) }{", "count": 126, "type": "remove by frequency"} -{"id": 45339, "token": "\u0120Taq", "merges": "\u0120Ta q", "count": 126, "type": "remove by frequency"} -{"id": 24254, "token": "\u0120]$", "merges": "\u0120] $", "count": 126, "type": "remove by frequency"} -{"id": 8699, "token": "}{-", "merges": "}{ -", "count": 126, "type": "remove by frequency"} -{"id": 42612, "token": "scriptsize", "merges": "script size", "count": 127, "type": "remove by frequency"} -{"id": 23997, "token": "vartheta", "merges": "vart heta", "count": 127, "type": "remove by frequency"} -{"id": 45354, "token": "\u0120Salem", "merges": "\u0120Sal em", "count": 127, "type": "remove by frequency"} -{"id": 48576, "token": "\u0120Marty", "merges": "\u0120Mart y", "count": 127, "type": "remove by frequency"} -{"id": 44509, "token": "\u0120Hond", "merges": "\u0120H ond", "count": 127, "type": "remove by frequency"} -{"id": 33045, "token": "\u0120kommer", "merges": "\u0120kom mer", "count": 128, "type": "remove by frequency"} -{"id": 24599, "token": "\u0120murine", "merges": "\u0120mur ine", "count": 128, "type": "remove by frequency"} -{"id": 20830, "token": "\u0120miRNAs", "merges": "\u0120mi RNAs", "count": 128, "type": "remove by frequency"} -{"id": 32333, "token": "\u0120efter", "merges": "\u0120e fter", "count": 128, "type": "remove by frequency"} -{"id": 44885, "token": "\u00c3\u00a4m\u00c3\u00a4", "merges": "\u00c3\u00a4m \u00c3\u00a4", "count": 128, "type": "remove by frequency"} -{"id": 41862, "token": "\u00e1\u00bf\u00b6", "merges": "\u00e1\u00bf \u00b6", "count": 128, "type": "remove by frequency"} -{"id": 29152, "token": "${\\", "merges": "$ {\\", "count": 128, "type": "remove by frequency"} -{"id": 43783, "token": "\u0120Philippine", "merges": "\u0120Philipp ine", "count": 129, "type": "remove by frequency"} -{"id": 45932, "token": "\u0120arginine", "merges": "\u0120arg inine", "count": 129, "type": "remove by frequency"} -{"id": 38284, "token": "\u0120assayed", "merges": "\u0120assay ed", "count": 129, "type": "remove by frequency"} -{"id": 48477, "token": "\u0120Baltic", "merges": "\u0120Balt ic", "count": 129, "type": "remove by frequency"} -{"id": 22404, "token": "\u0120Putin", "merges": "\u0120Put in", "count": 129, "type": "remove by frequency"} -{"id": 29414, "token": "\u0120Jos\u00c3\u00a9", "merges": "\u0120Jos \u00c3\u00a9", "count": 129, "type": "remove by frequency"} -{"id": 47946, "token": "\u00cf\u0128\u00ce\u00bf", "merges": "\u00cf\u0128 \u00ce\u00bf", "count": 129, "type": "remove by frequency"} -{"id": 54597, "token": "\u00ef\u00bd\u00a4", "merges": "\u00ef\u00bd \u00a4", "count": 129, "type": "remove by frequency"} -{"id": 33975, "token": "\u0120Ultimately", "merges": "\u0120Ult imately", "count": 130, "type": "remove by frequency"} -{"id": 23294, "token": "\u0120Alzheimer", "merges": "\u0120Al zheimer", "count": 130, "type": "remove by frequency"} -{"id": 41460, "token": "\u0120Istanbul", "merges": "\u0120I stanbul", "count": 130, "type": "remove by frequency"} -{"id": 33072, "token": "\u0120Exhibit", "merges": "\u0120Ex hibit", "count": 130, "type": "remove by frequency"} -{"id": 34913, "token": "\u0120Marines", "merges": "\u0120Mar ines", "count": 130, "type": "remove by frequency"} -{"id": 37790, "token": "\u0120Rugby", "merges": "\u0120Rug by", "count": 130, "type": "remove by frequency"} -{"id": 48335, "token": "\u0120avons", "merges": "\u0120av ons", "count": 130, "type": "remove by frequency"} -{"id": 27650, "token": "\u0120\u00c5\u00bee", "merges": "\u0120\u00c5\u00be e", "count": 130, "type": "remove by frequency"} -{"id": 43913, "token": "eqno", "merges": "eq no", "count": 130, "type": "remove by frequency"} -{"id": 47200, "token": "\u0120oligonucleotides", "merges": "\u0120oligonucle otides", "count": 131, "type": "remove by frequency"} -{"id": 41749, "token": "\u0120Supplemental", "merges": "\u0120Supp lemental", "count": 131, "type": "remove by frequency"} -{"id": 42669, "token": "\u0120Springfield", "merges": "\u0120Spring field", "count": 131, "type": "remove by frequency"} -{"id": 37772, "token": "\u0120moiety", "merges": "\u0120moi ety", "count": 131, "type": "remove by frequency"} -{"id": 29161, "token": "\u0120Stalin", "merges": "\u0120St alin", "count": 131, "type": "remove by frequency"} -{"id": 46625, "token": "\u0120Debor", "merges": "\u0120De bor", "count": 131, "type": "remove by frequency"} -{"id": 12159, "token": "]{}\\", "merges": "]{ }\\", "count": 131, "type": "remove by frequency"} -{"id": 28555, "token": "\u00e1\u00bd\u00b6", "merges": "\u00e1\u00bd \u00b6", "count": 131, "type": "remove by frequency"} -{"id": 42916, "token": "\u00e0\u00b1\u012f", "merges": "\u00e0\u00b1 \u012f", "count": 131, "type": "remove by frequency"} -{"id": 24625, "token": "\u00c2\u0141", "merges": "\u00c2 \u0141", "count": 131, "type": "remove by frequency"} -{"id": 40867, "token": "\u0120ribosomal", "merges": "\u0120rib osomal", "count": 132, "type": "remove by frequency"} -{"id": 50112, "token": "\u0120Gonzales", "merges": "\u0120Gonz ales", "count": 132, "type": "remove by frequency"} -{"id": 49632, "token": "\u0120formalin", "merges": "\u0120formal in", "count": 132, "type": "remove by frequency"} -{"id": 38027, "token": "\u0120Prosec", "merges": "\u0120Pro sec", "count": 132, "type": "remove by frequency"} -{"id": 45867, "token": "\u0120Megan", "merges": "\u0120Meg an", "count": 132, "type": "remove by frequency"} -{"id": 39924, "token": "\u0120Tanz", "merges": "\u0120T anz", "count": 132, "type": "remove by frequency"} -{"id": 50203, "token": "\u0120Cys", "merges": "\u0120C ys", "count": 132, "type": "remove by frequency"} -{"id": 4700, "token": ".$$", "merges": ". $$", "count": 132, "type": "remove by frequency"} -{"id": 42479, "token": "\u00e1\u00bd\u00b8", "merges": "\u00e1\u00bd \u00b8", "count": 132, "type": "remove by frequency"} -{"id": 32483, "token": "\u0120Increased", "merges": "\u0120Incre ased", "count": 133, "type": "remove by frequency"} -{"id": 49048, "token": "\u0120Addiction", "merges": "\u0120Add iction", "count": 133, "type": "remove by frequency"} -{"id": 20736, "token": "\u0120Democrat", "merges": "\u0120Democr at", "count": 133, "type": "remove by frequency"} -{"id": 32533, "token": "\u0120Ninth", "merges": "\u0120N inth", "count": 133, "type": "remove by frequency"} -{"id": 41304, "token": "\u0120\u00c3\u00a5r", "merges": "\u0120\u00c3\u00a5 r", "count": 133, "type": "remove by frequency"} -{"id": 44687, "token": "\u00cf\u0122\u00ce\u00b5", "merges": "\u00cf\u0122 \u00ce\u00b5", "count": 133, "type": "remove by frequency"} -{"id": 14101, "token": ",\\,", "merges": ",\\ ,", "count": 133, "type": "remove by frequency"} -{"id": 34763, "token": "\u0120myeloid", "merges": "\u0120myel oid", "count": 134, "type": "remove by frequency"} -{"id": 26315, "token": "\u0120Bangl", "merges": "\u0120Bang l", "count": 134, "type": "remove by frequency"} -{"id": 42135, "token": "\u0120Vince", "merges": "\u0120V ince", "count": 134, "type": "remove by frequency"} -{"id": 49008, "token": "\u0120Palin", "merges": "\u0120Pal in", "count": 134, "type": "remove by frequency"} -{"id": 25614, "token": "\u0120avait", "merges": "\u0120av ait", "count": 134, "type": "remove by frequency"} -{"id": 40171, "token": "\u0120\u00e1\u00bc\u0122", "merges": "\u0120\u00e1\u00bc \u0122", "count": 134, "type": "remove by frequency"} -{"id": 24142, "token": "\u0120\u00c2\u00bd", "merges": "\u0120\u00c2 \u00bd", "count": 134, "type": "remove by frequency"} -{"id": 26723, "token": "^+$", "merges": "^+ $", "count": 134, "type": "remove by frequency"} -{"id": 52472, "token": "\u00e6\u00b1\u0130", "merges": "\u00e6\u00b1 \u0130", "count": 134, "type": "remove by frequency"} -{"id": 50286, "token": "\u00cb\u012f", "merges": "\u00cb \u012f", "count": 134, "type": "remove by frequency"} -{"id": 20973, "token": "\u0120Connecticut", "merges": "\u0120Conne cticut", "count": 135, "type": "remove by frequency"} -{"id": 27330, "token": "\u0120cytotoxic", "merges": "\u0120cytotox ic", "count": 135, "type": "remove by frequency"} -{"id": 35843, "token": "\u0120Battalion", "merges": "\u0120Batt alion", "count": 135, "type": "remove by frequency"} -{"id": 32216, "token": "\u0120Secondly", "merges": "\u0120Second ly", "count": 135, "type": "remove by frequency"} -{"id": 47475, "token": "\u0120hydrogel", "merges": "\u0120hydro gel", "count": 135, "type": "remove by frequency"} -{"id": 49185, "token": "\u0120Arabian", "merges": "\u0120Arab ian", "count": 135, "type": "remove by frequency"} -{"id": 41994, "token": "\u0120Raiders", "merges": "\u0120Ra iders", "count": 135, "type": "remove by frequency"} -{"id": 50226, "token": "\u0120andra", "merges": "\u0120and ra", "count": 135, "type": "remove by frequency"} -{"id": 48622, "token": "Kilos", "merges": "K ilos", "count": 135, "type": "remove by frequency"} -{"id": 43994, "token": "\u00ce\u00bb\u00ce\u00b1", "merges": "\u00ce\u00bb \u00ce\u00b1", "count": 135, "type": "remove by frequency"} -{"id": 26568, "token": "_{-\\", "merges": "_{ -\\", "count": 135, "type": "remove by frequency"} -{"id": 48586, "token": "\u0120Nottingham", "merges": "\u0120Not tingham", "count": 136, "type": "remove by frequency"} -{"id": 43113, "token": "\u0120Indonesian", "merges": "\u0120Indones ian", "count": 136, "type": "remove by frequency"} -{"id": 32665, "token": "\u0120Judicial", "merges": "\u0120Jud icial", "count": 136, "type": "remove by frequency"} -{"id": 21461, "token": "\u0120Clearly", "merges": "\u0120Cle arly", "count": 136, "type": "remove by frequency"} -{"id": 38442, "token": "\u0120hade", "merges": "\u0120had e", "count": 136, "type": "remove by frequency"} -{"id": 46858, "token": "biamo", "merges": "bi amo", "count": 136, "type": "remove by frequency"} -{"id": 35613, "token": "\u0120/*!", "merges": "\u0120/* !", "count": 136, "type": "remove by frequency"} -{"id": 39586, "token": "}{~", "merges": "}{ ~", "count": 136, "type": "remove by frequency"} -{"id": 34996, "token": "\u0120\u00e1\u00bd", "merges": "\u0120 \u00e1\u00bd", "count": 136, "type": "remove by frequency"} -{"id": 52354, "token": "\u00e6\u00a7\u0135", "merges": "\u00e6\u00a7 \u0135", "count": 136, "type": "remove by frequency"} -{"id": 9589, "token": "\u00c2\u0122", "merges": "\u00c2 \u0122", "count": 136, "type": "remove by frequency"} -{"id": 46383, "token": "\u0120oligonucleotide", "merges": "\u0120oligonucle otide", "count": 137, "type": "remove by frequency"} -{"id": 40358, "token": "\u0120Bonferroni", "merges": "\u0120Bon ferroni", "count": 137, "type": "remove by frequency"} -{"id": 46058, "token": "\u0120econ\u00c3\u00b3", "merges": "\u0120econ \u00c3\u00b3", "count": 137, "type": "remove by frequency"} -{"id": 31904, "token": "\u0120\\*\\*\\*", "merges": "\u0120\\* \\*\\*", "count": 137, "type": "remove by frequency"} -{"id": 33649, "token": "\u0120PTSD", "merges": "\u0120PT SD", "count": 137, "type": "remove by frequency"} -{"id": 9722, "token": "\u0120$(\\", "merges": "\u0120$ (\\", "count": 137, "type": "remove by frequency"} -{"id": 50664, "token": "\u00e5\u0125\u012f", "merges": "\u00e5\u0125 \u012f", "count": 137, "type": "remove by frequency"} -{"id": 50324, "token": "\u00e2\u012a\u00ae", "merges": "\u00e2\u012a \u00ae", "count": 137, "type": "remove by frequency"} -{"id": 47502, "token": "\u0120pharmacokinetic", "merges": "\u0120pharmac okinetic", "count": 138, "type": "remove by frequency"} -{"id": 49251, "token": "\u0120Rehabilitation", "merges": "\u0120Re habilitation", "count": 138, "type": "remove by frequency"} -{"id": 35435, "token": "\u0120constexpr", "merges": "\u0120const expr", "count": 138, "type": "remove by frequency"} -{"id": 33154, "token": "\u0120pol\u00c3\u0143tica", "merges": "\u0120pol\u00c3\u0143 tica", "count": 138, "type": "remove by frequency"} -{"id": 25187, "token": "Defendant", "merges": "Def endant", "count": 138, "type": "remove by frequency"} -{"id": 17360, "token": "\u0120Oklahoma", "merges": "\u0120O klahoma", "count": 138, "type": "remove by frequency"} -{"id": 37976, "token": "\u0120Theresa", "merges": "\u0120The resa", "count": 138, "type": "remove by frequency"} -{"id": 33199, "token": "\u0120Memphis", "merges": "\u0120Mem phis", "count": 138, "type": "remove by frequency"} -{"id": 43265, "token": "jsfiddle", "merges": "js fiddle", "count": 138, "type": "remove by frequency"} -{"id": 34262, "token": "\u0120Romania", "merges": "\u0120Roman ia", "count": 138, "type": "remove by frequency"} -{"id": 41876, "token": "\u0120titers", "merges": "\u0120tit ers", "count": 138, "type": "remove by frequency"} -{"id": 45428, "token": "\u0120Booker", "merges": "\u0120Book er", "count": 138, "type": "remove by frequency"} -{"id": 30935, "token": "\u00cf\u0127\u00cf\u0126", "merges": "\u00cf\u0127 \u00cf\u0126", "count": 138, "type": "remove by frequency"} -{"id": 34957, "token": "\u0120COX", "merges": "\u0120CO X", "count": 138, "type": "remove by frequency"} -{"id": 33411, "token": "\u0120Vatican", "merges": "\u0120V atican", "count": 139, "type": "remove by frequency"} -{"id": 21219, "token": "\u0120Nazi", "merges": "\u0120Naz i", "count": 139, "type": "remove by frequency"} -{"id": 50377, "token": "\u00e2\u0139\u00a2", "merges": "\u00e2\u0139 \u00a2", "count": 139, "type": "remove by frequency"} -{"id": 25465, "token": "\u0120apoptotic", "merges": "\u0120apopt otic", "count": 140, "type": "remove by frequency"} -{"id": 45999, "token": "\u0120Sundays", "merges": "\u0120Sund ays", "count": 140, "type": "remove by frequency"} -{"id": 43137, "token": "\u0120Voor", "merges": "\u0120V oor", "count": 140, "type": "remove by frequency"} -{"id": 43766, "token": "\u00e1\u00bd\u00b2", "merges": "\u00e1\u00bd \u00b2", "count": 140, "type": "remove by frequency"} -{"id": 41426, "token": "\u0120Hampton", "merges": "\u0120Ham pton", "count": 141, "type": "remove by frequency"} -{"id": 46907, "token": "\u0120Kimber", "merges": "\u0120Kim ber", "count": 141, "type": "remove by frequency"} -{"id": 46390, "token": "\u0120Rubio", "merges": "\u0120Rub io", "count": 141, "type": "remove by frequency"} -{"id": 27078, "token": "\u0120Aunt", "merges": "\u0120A unt", "count": 141, "type": "remove by frequency"} -{"id": 29753, "token": "rvert", "merges": "r vert", "count": 141, "type": "remove by frequency"} -{"id": 36589, "token": "\u0120Yug", "merges": "\u0120Y ug", "count": 141, "type": "remove by frequency"} -{"id": 42257, "token": "\u00c4\u0123s", "merges": "\u00c4\u0123 s", "count": 141, "type": "remove by frequency"} -{"id": 23471, "token": "\u0120knockdown", "merges": "\u0120knock down", "count": 142, "type": "remove by frequency"} -{"id": 46827, "token": "\u0120Downtown", "merges": "\u0120Down town", "count": 142, "type": "remove by frequency"} -{"id": 41521, "token": "\u0120kunnen", "merges": "\u0120kunn en", "count": 142, "type": "remove by frequency"} -{"id": 44465, "token": "\u0120Deaf", "merges": "\u0120De af", "count": 142, "type": "remove by frequency"} -{"id": 36880, "token": "\u0120RNAi", "merges": "\u0120RNA i", "count": 142, "type": "remove by frequency"} -{"id": 46424, "token": "\u0120Seah", "merges": "\u0120Se ah", "count": 142, "type": "remove by frequency"} -{"id": 22479, "token": "\u0120LORD", "merges": "\u0120L ORD", "count": 142, "type": "remove by frequency"} -{"id": 47715, "token": "|}\\", "merges": "| }\\", "count": 142, "type": "remove by frequency"} -{"id": 41137, "token": "*~,", "merges": "*~ ,", "count": 142, "type": "remove by frequency"} -{"id": 6250, "token": "\u00e2\u0122\u012c", "merges": "\u00e2\u0122 \u012c", "count": 142, "type": "remove by frequency"} -{"id": 36472, "token": "\u0120hepatocytes", "merges": "\u0120hepat ocytes", "count": 143, "type": "remove by frequency"} -{"id": 31573, "token": "\u0120Protestant", "merges": "\u0120Protest ant", "count": 143, "type": "remove by frequency"} -{"id": 38394, "token": "\u0120Homeland", "merges": "\u0120Hom eland", "count": 143, "type": "remove by frequency"} -{"id": 35099, "token": "\u0120Eleanor", "merges": "\u0120Ele anor", "count": 143, "type": "remove by frequency"} -{"id": 40158, "token": "\u0120\u00d1\u0123\u00d1\u012c", "merges": "\u0120\u00d1\u0123 \u00d1\u012c", "count": 143, "type": "remove by frequency"} -{"id": 38150, "token": "\u0120Stephanie", "merges": "\u0120Stephan ie", "count": 144, "type": "remove by frequency"} -{"id": 49094, "token": "\u0120Breit", "merges": "\u0120Bre it", "count": 144, "type": "remove by frequency"} -{"id": 33133, "token": "\u0120Mice", "merges": "\u0120M ice", "count": 144, "type": "remove by frequency"} -{"id": 46604, "token": "\u0120Erin", "merges": "\u0120Er in", "count": 144, "type": "remove by frequency"} -{"id": 48720, "token": "\u0120daar", "merges": "\u0120da ar", "count": 144, "type": "remove by frequency"} -{"id": 31016, "token": "\u0120\u00c3\u00ac", "merges": "\u0120\u00c3 \u00ac", "count": 144, "type": "remove by frequency"} -{"id": 10022, "token": "\u0120Accordingly", "merges": "\u0120According ly", "count": 145, "type": "remove by frequency"} -{"id": 48107, "token": "\u0120follicular", "merges": "\u0120follic ular", "count": 145, "type": "remove by frequency"} -{"id": 44664, "token": "\u0120surjective", "merges": "\u0120sur jective", "count": 145, "type": "remove by frequency"} -{"id": 21744, "token": "\u0120mammalian", "merges": "\u0120mamm alian", "count": 145, "type": "remove by frequency"} -{"id": 49542, "token": "\u0120Duchess", "merges": "\u0120Duc hess", "count": 145, "type": "remove by frequency"} -{"id": 41834, "token": "\u0120Hawks", "merges": "\u0120Haw ks", "count": 145, "type": "remove by frequency"} -{"id": 41654, "token": "\u0120Rosie", "merges": "\u0120Ros ie", "count": 145, "type": "remove by frequency"} -{"id": 43308, "token": "\u0120Aside", "merges": "\u0120As ide", "count": 145, "type": "remove by frequency"} -{"id": 48058, "token": "\u00e0\u00b1\u0123", "merges": "\u00e0\u00b1 \u0123", "count": 145, "type": "remove by frequency"} -{"id": 39880, "token": "\u0120homosexuality", "merges": "\u0120hom osexuality", "count": 146, "type": "remove by frequency"} -{"id": 39689, "token": "\u0120Brotherhood", "merges": "\u0120Brother hood", "count": 146, "type": "remove by frequency"} -{"id": 40874, "token": "\u0120Interstate", "merges": "\u0120Inter state", "count": 146, "type": "remove by frequency"} -{"id": 30158, "token": "\u0120microbiota", "merges": "\u0120microbi ota", "count": 146, "type": "remove by frequency"} -{"id": 28681, "token": "\u0120Whereas", "merges": "\u0120Where as", "count": 146, "type": "remove by frequency"} -{"id": 40103, "token": "\u0120Dorothy", "merges": "\u0120Dor othy", "count": 146, "type": "remove by frequency"} -{"id": 24073, "token": "\u0120\u00cf\u0126\u00ce\u00b7\u00cf\u0124", "merges": "\u0120\u00cf\u0126\u00ce\u00b7 \u00cf\u0124", "count": 146, "type": "remove by frequency"} -{"id": 37801, "token": "\u0120Armen", "merges": "\u0120Arm en", "count": 146, "type": "remove by frequency"} -{"id": 32546, "token": "\u0120interleukin", "merges": "\u0120interle ukin", "count": 147, "type": "remove by frequency"} -{"id": 43327, "token": "\u0120Methodist", "merges": "\u0120Method ist", "count": 147, "type": "remove by frequency"} -{"id": 54600, "token": "\u00ef\u00bd\u00b0", "merges": "\u00ef\u00bd \u00b0", "count": 147, "type": "remove by frequency"} -{"id": 41887, "token": "\u0120Embassy", "merges": "\u0120Emb assy", "count": 148, "type": "remove by frequency"} -{"id": 43320, "token": "\u0120porcine", "merges": "\u0120por cine", "count": 148, "type": "remove by frequency"} -{"id": 40764, "token": "\u0120Conse", "merges": "\u0120Con se", "count": 148, "type": "remove by frequency"} -{"id": 31719, "token": "\u0120RNAs", "merges": "\u0120RNA s", "count": 148, "type": "remove by frequency"} -{"id": 48990, "token": "\u0120Leah", "merges": "\u0120Le ah", "count": 148, "type": "remove by frequency"} -{"id": 50303, "token": "\u00e2\u0123\u00ba", "merges": "\u00e2\u0123 \u00ba", "count": 148, "type": "remove by frequency"} -{"id": 40497, "token": "\u0120hydrophilic", "merges": "\u0120hydroph ilic", "count": 149, "type": "remove by frequency"} -{"id": 42573, "token": "\u0120leukocyte", "merges": "\u0120leuk ocyte", "count": 149, "type": "remove by frequency"} -{"id": 48244, "token": "\u0120Somerset", "merges": "\u0120Somers et", "count": 149, "type": "remove by frequency"} -{"id": 45188, "token": "\u0120h\u00c3\u00a4r", "merges": "\u0120h \u00c3\u00a4r", "count": 149, "type": "remove by frequency"} -{"id": 48291, "token": "\u0120PGE", "merges": "\u0120P GE", "count": 149, "type": "remove by frequency"} -{"id": 21304, "token": "\u00c2\u0142\u010a", "merges": "\u00c2\u0142 \u010a", "count": 149, "type": "remove by frequency"} -{"id": 49667, "token": "\u0120atherosclerotic", "merges": "\u0120atheros clerotic", "count": 150, "type": "remove by frequency"} -{"id": 44377, "token": "\u0120glycoprotein", "merges": "\u0120glyc oprotein", "count": 150, "type": "remove by frequency"} -{"id": 46599, "token": "\u0120Chrysler", "merges": "\u0120Chrys ler", "count": 150, "type": "remove by frequency"} -{"id": 33016, "token": "\u0120Corb", "merges": "\u0120Cor b", "count": 150, "type": "remove by frequency"} -{"id": 43762, "token": "\u00d9\u0128\u00d8\u00a7", "merges": "\u00d9\u0128 \u00d8\u00a7", "count": 150, "type": "remove by frequency"} -{"id": 44586, "token": "\u00cf\u0125\u00ce\u00b9", "merges": "\u00cf\u0125 \u00ce\u00b9", "count": 150, "type": "remove by frequency"} -{"id": 44428, "token": "\u0120&$", "merges": "\u0120& $", "count": 150, "type": "remove by frequency"} -{"id": 47355, "token": "\u0120carcinogenesis", "merges": "\u0120carcin ogenesis", "count": 151, "type": "remove by frequency"} -{"id": 47154, "token": "\u0120Byzantine", "merges": "\u0120Byz antine", "count": 151, "type": "remove by frequency"} -{"id": 41726, "token": "\u0120liposomes", "merges": "\u0120lip osomes", "count": 151, "type": "remove by frequency"} -{"id": 46439, "token": "\u0120nucleon", "merges": "\u0120nucle on", "count": 151, "type": "remove by frequency"} -{"id": 40772, "token": "\u0120Strick", "merges": "\u0120St rick", "count": 151, "type": "remove by frequency"} -{"id": 38916, "token": "\u0120Willie", "merges": "\u0120Will ie", "count": 151, "type": "remove by frequency"} -{"id": 45907, "token": "\u0120Rapids", "merges": "\u0120Rap ids", "count": 151, "type": "remove by frequency"} -{"id": 40525, "token": "\u0120mtDNA", "merges": "\u0120mt DNA", "count": 151, "type": "remove by frequency"} -{"id": 48545, "token": "\u0120Cody", "merges": "\u0120C ody", "count": 151, "type": "remove by frequency"} -{"id": 41670, "token": "\u0120Jury", "merges": "\u0120J ury", "count": 151, "type": "remove by frequency"} -{"id": 45489, "token": "\u0120GSK", "merges": "\u0120G SK", "count": 151, "type": "remove by frequency"} -{"id": 37097, "token": "\u00cf\u0123\u00ce\u00b5", "merges": "\u00cf\u0123 \u00ce\u00b5", "count": 151, "type": "remove by frequency"} -{"id": 53115, "token": "\u00e7\u00a5\u0137", "merges": "\u00e7\u00a5 \u0137", "count": 151, "type": "remove by frequency"} -{"id": 44632, "token": "\u0120heterocy", "merges": "\u0120heter ocy", "count": 152, "type": "remove by frequency"} -{"id": 35743, "token": "\u0120Rhode", "merges": "\u0120Rh ode", "count": 152, "type": "remove by frequency"} -{"id": 42782, "token": "\u0120glial", "merges": "\u0120gl ial", "count": 152, "type": "remove by frequency"} -{"id": 50138, "token": "\u0120gonad", "merges": "\u0120gon ad", "count": 152, "type": "remove by frequency"} -{"id": 34749, "token": "\u0120\u00e0\u00b0", "merges": "\u0120 \u00e0\u00b0", "count": 152, "type": "remove by frequency"} -{"id": 22389, "token": "\u0120Lieutenant", "merges": "\u0120Lie utenant", "count": 153, "type": "remove by frequency"} -{"id": 33206, "token": "\u0120WHETHER", "merges": "\u0120WHE THER", "count": 153, "type": "remove by frequency"} -{"id": 37083, "token": "\u0120Heather", "merges": "\u0120He ather", "count": 153, "type": "remove by frequency"} -{"id": 47385, "token": "\u0120Elaine", "merges": "\u0120El aine", "count": 153, "type": "remove by frequency"} -{"id": 47329, "token": "\u0120Finals", "merges": "\u0120Fin als", "count": 153, "type": "remove by frequency"} -{"id": 43427, "token": "\u0120endothelium", "merges": "\u0120end othelium", "count": 154, "type": "remove by frequency"} -{"id": 41661, "token": "\u0120hypothesize", "merges": "\u0120hypothes ize", "count": 154, "type": "remove by frequency"} -{"id": 49224, "token": "\u0120Weinstein", "merges": "\u0120We instein", "count": 154, "type": "remove by frequency"} -{"id": 35459, "token": "\u0120Newcastle", "merges": "\u0120New castle", "count": 154, "type": "remove by frequency"} -{"id": 46787, "token": "\u0120striatum", "merges": "\u0120stri atum", "count": 154, "type": "remove by frequency"} -{"id": 45711, "token": "\u0120Kathleen", "merges": "\u0120Kath leen", "count": 154, "type": "remove by frequency"} -{"id": 48006, "token": "\u0120Trustees", "merges": "\u0120Trust ees", "count": 154, "type": "remove by frequency"} -{"id": 22538, "token": "\u0120Medicare", "merges": "\u0120Medic are", "count": 154, "type": "remove by frequency"} -{"id": 44669, "token": "\u0120Merkel", "merges": "\u0120Mer kel", "count": 154, "type": "remove by frequency"} -{"id": 30926, "token": "\u0120McCain", "merges": "\u0120McC ain", "count": 154, "type": "remove by frequency"} -{"id": 12728, "token": "\u0120Syria", "merges": "\u0120Sy ria", "count": 154, "type": "remove by frequency"} -{"id": 32194, "token": "\u0120assertEquals", "merges": "\u0120assert Equals", "count": 155, "type": "remove by frequency"} -{"id": 40996, "token": "\u0120Northeast", "merges": "\u0120Nort heast", "count": 155, "type": "remove by frequency"} -{"id": 48531, "token": "\u0120\u00d8\u00a7\u00d9\u0126\u00d8\u00a3", "merges": "\u0120\u00d8\u00a7\u00d9\u0126\u00d8 \u00a3", "count": 155, "type": "remove by frequency"} -{"id": 46617, "token": "\u0120efflux", "merges": "\u0120eff lux", "count": 155, "type": "remove by frequency"} -{"id": 36452, "token": "\u0120Greeks", "merges": "\u0120Gree ks", "count": 155, "type": "remove by frequency"} -{"id": 45274, "token": "\u0120Carrie", "merges": "\u0120Car rie", "count": 155, "type": "remove by frequency"} -{"id": 34861, "token": "\u0120Tamil", "merges": "\u0120Tam il", "count": 155, "type": "remove by frequency"} -{"id": 49878, "token": "\u0120Sears", "merges": "\u0120S ears", "count": 155, "type": "remove by frequency"} -{"id": 26284, "token": "}}+", "merges": "}} +", "count": 155, "type": "remove by frequency"} -{"id": 41600, "token": "\u0120angiotensin", "merges": "\u0120ang iotensin", "count": 156, "type": "remove by frequency"} -{"id": 18370, "token": "\u0120Kentucky", "merges": "\u0120Kent ucky", "count": 156, "type": "remove by frequency"} -{"id": 42748, "token": "\u0120Contrary", "merges": "\u0120Cont rary", "count": 156, "type": "remove by frequency"} -{"id": 46024, "token": "\u0120colonic", "merges": "\u0120col onic", "count": 156, "type": "remove by frequency"} -{"id": 41118, "token": "\u0120Corinth", "merges": "\u0120Cor inth", "count": 156, "type": "remove by frequency"} -{"id": 47959, "token": "\u0120Thames", "merges": "\u0120Th ames", "count": 156, "type": "remove by frequency"} -{"id": 52870, "token": "\u00e7\u0136\u00a6", "merges": "\u00e7\u0136 \u00a6", "count": 156, "type": "remove by frequency"} -{"id": 50086, "token": "\u0120Budapest", "merges": "\u0120Bud apest", "count": 157, "type": "remove by frequency"} -{"id": 41722, "token": "hetized", "merges": "het ized", "count": 157, "type": "remove by frequency"} -{"id": 41577, "token": "\u0120Prague", "merges": "\u0120Pr ague", "count": 157, "type": "remove by frequency"} -{"id": 49156, "token": "idenote", "merges": "iden ote", "count": 157, "type": "remove by frequency"} -{"id": 21575, "token": "\u0120TLR", "merges": "\u0120T LR", "count": 157, "type": "remove by frequency"} -{"id": 50308, "token": "\u00e2\u0124\u0126", "merges": "\u00e2\u0124 \u0126", "count": 157, "type": "remove by frequency"} -{"id": 1186, "token": "\u00e2\u0122\u012b", "merges": "\u00e2\u0122 \u012b", "count": 157, "type": "remove by frequency"} -{"id": 39130, "token": "\\},", "merges": "\\ },", "count": 157, "type": "remove by frequency"} -{"id": 46400, "token": "\u0120prostagland", "merges": "\u0120prost agland", "count": 158, "type": "remove by frequency"} -{"id": 45172, "token": "\u0120Menschen", "merges": "\u0120Mens chen", "count": 158, "type": "remove by frequency"} -{"id": 29133, "token": "\u0120surfact", "merges": "\u0120sur fact", "count": 158, "type": "remove by frequency"} -{"id": 20575, "token": "\u0120Colonel", "merges": "\u0120Col onel", "count": 158, "type": "remove by frequency"} -{"id": 33283, "token": "\u0120Gospel", "merges": "\u0120G ospel", "count": 158, "type": "remove by frequency"} -{"id": 49231, "token": "\u0120Shelby", "merges": "\u0120Shel by", "count": 158, "type": "remove by frequency"} -{"id": 44206, "token": "\u00cf\u0123\u00ce\u00b7", "merges": "\u00cf\u0123 \u00ce\u00b7", "count": 158, "type": "remove by frequency"} -{"id": 35962, "token": "\u0120microenvironment", "merges": "\u0120micro environment", "count": 159, "type": "remove by frequency"} -{"id": 49105, "token": "\u0120antifungal", "merges": "\u0120antif ungal", "count": 159, "type": "remove by frequency"} -{"id": 40369, "token": "\u0120integrable", "merges": "\u0120integr able", "count": 159, "type": "remove by frequency"} -{"id": 50033, "token": "\u0120Natalie", "merges": "\u0120Natal ie", "count": 159, "type": "remove by frequency"} -{"id": 44744, "token": "\u0120Indies", "merges": "\u0120Ind ies", "count": 159, "type": "remove by frequency"} -{"id": 46634, "token": "\u0120Siber", "merges": "\u0120S iber", "count": 159, "type": "remove by frequency"} -{"id": 42454, "token": "\u0120v\u00c3\u00a4l", "merges": "\u0120v \u00c3\u00a4l", "count": 159, "type": "remove by frequency"} -{"id": 48739, "token": "\u0120Auss", "merges": "\u0120A uss", "count": 159, "type": "remove by frequency"} -{"id": 29074, "token": "it\u00c3\u00a4", "merges": "it \u00c3\u00a4", "count": 159, "type": "remove by frequency"} -{"id": 42405, "token": "\u0120pathophysiology", "merges": "\u0120pathophys iology", "count": 160, "type": "remove by frequency"} -{"id": 30348, "token": "\u0120hippocampal", "merges": "\u0120hippocamp al", "count": 160, "type": "remove by frequency"} -{"id": 34937, "token": "Throughout", "merges": "Through out", "count": 160, "type": "remove by frequency"} -{"id": 10158, "token": "\u0120Attorney", "merges": "\u0120Att orney", "count": 160, "type": "remove by frequency"} -{"id": 40003, "token": "\u0120Dayton", "merges": "\u0120Day ton", "count": 160, "type": "remove by frequency"} -{"id": 19897, "token": "\u0120Deputy", "merges": "\u0120Dep uty", "count": 160, "type": "remove by frequency"} -{"id": 48646, "token": "\u0120ADVIS", "merges": "\u0120AD VIS", "count": 160, "type": "remove by frequency"} -{"id": 26820, "token": "\u0120eosin", "merges": "\u0120e osin", "count": 160, "type": "remove by frequency"} -{"id": 37833, "token": "\u00ce\u00bf\u00cf\u0127\u00cf\u0124", "merges": "\u00ce\u00bf\u00cf\u0127 \u00cf\u0124", "count": 160, "type": "remove by frequency"} -{"id": 24118, "token": "\u0120Eqs", "merges": "\u0120Eq s", "count": 160, "type": "remove by frequency"} -{"id": 45119, "token": "\u0120\u00c3\u00a9galement", "merges": "\u0120\u00c3\u00a9gal ement", "count": 161, "type": "remove by frequency"} -{"id": 42457, "token": "\u0120Leicester", "merges": "\u0120Le icester", "count": 161, "type": "remove by frequency"} -{"id": 30154, "token": "\u0120Orthodox", "merges": "\u0120Orth odox", "count": 161, "type": "remove by frequency"} -{"id": 18267, "token": "\u0120Atlanta", "merges": "\u0120Atl anta", "count": 161, "type": "remove by frequency"} -{"id": 33279, "token": "\u0120hebben", "merges": "\u0120he bben", "count": 161, "type": "remove by frequency"} -{"id": 26334, "token": "\u0120Hindu", "merges": "\u0120Hind u", "count": 161, "type": "remove by frequency"} -{"id": 43813, "token": "\u0120crore", "merges": "\u0120cro re", "count": 161, "type": "remove by frequency"} -{"id": 49266, "token": "\u0120EIGEN", "merges": "\u0120E IGEN", "count": 161, "type": "remove by frequency"} -{"id": 40063, "token": "\u00ce\u00b9\u00cf\u0124", "merges": "\u00ce\u00b9 \u00cf\u0124", "count": 161, "type": "remove by frequency"} -{"id": 10493, "token": "\u0120$|", "merges": "\u0120$ |", "count": 161, "type": "remove by frequency"} -{"id": 12257, "token": "\u0120Nevertheless", "merges": "\u0120Never theless", "count": 162, "type": "remove by frequency"} -{"id": 35258, "token": "\u0120Providence", "merges": "\u0120Prov idence", "count": 162, "type": "remove by frequency"} -{"id": 41123, "token": "aminergic", "merges": "amin ergic", "count": 162, "type": "remove by frequency"} -{"id": 25659, "token": "\u0120Cauc", "merges": "\u0120Ca uc", "count": 162, "type": "remove by frequency"} -{"id": 49711, "token": "\u0120Kush", "merges": "\u0120K ush", "count": 162, "type": "remove by frequency"} -{"id": 31051, "token": "\u0120s\u00c3\u00a4", "merges": "\u0120s \u00c3\u00a4", "count": 162, "type": "remove by frequency"} -{"id": 36283, "token": "\u0120f\u00c3\u00a5", "merges": "\u0120f \u00c3\u00a5", "count": 162, "type": "remove by frequency"} -{"id": 39022, "token": "\u0120immunohistochemical", "merges": "\u0120immunohist ochemical", "count": 163, "type": "remove by frequency"} -{"id": 25574, "token": "\u0120Representatives", "merges": "\u0120Represent atives", "count": 163, "type": "remove by frequency"} -{"id": 44154, "token": "\u0120glucocortic", "merges": "\u0120gluc ocortic", "count": 163, "type": "remove by frequency"} -{"id": 16091, "token": "\u0120Tennessee", "merges": "\u0120Tenn essee", "count": 163, "type": "remove by frequency"} -{"id": 42408, "token": "\u0120Beverly", "merges": "\u0120Bever ly", "count": 163, "type": "remove by frequency"} -{"id": 23506, "token": "\u0120caspase", "merges": "\u0120c aspase", "count": 163, "type": "remove by frequency"} -{"id": 18718, "token": "\u0120Senator", "merges": "\u0120Sen ator", "count": 163, "type": "remove by frequency"} -{"id": 29842, "token": "ottu", "merges": "ott u", "count": 163, "type": "remove by frequency"} -{"id": 36535, "token": "r\u00c3\u00a5", "merges": "r \u00c3\u00a5", "count": 163, "type": "remove by frequency"} -{"id": 28118, "token": "\\][", "merges": "\\] [", "count": 163, "type": "remove by frequency"} -{"id": 50383, "token": "\u00e2\u013b\u00ab", "merges": "\u00e2\u013b \u00ab", "count": 163, "type": "remove by frequency"} -{"id": 38851, "token": "\u00c8\u013di", "merges": "\u00c8\u013d i", "count": 163, "type": "remove by frequency"} -{"id": 8994, "token": "\u0120\\>", "merges": "\u0120\\ >", "count": 163, "type": "remove by frequency"} -{"id": 21828, "token": "]$,", "merges": "] $,", "count": 163, "type": "remove by frequency"} -{"id": 32108, "token": "\u0120malignancy", "merges": "\u0120malign ancy", "count": 164, "type": "remove by frequency"} -{"id": 42923, "token": "\u0120Auckland", "merges": "\u0120A uckland", "count": 164, "type": "remove by frequency"} -{"id": 43614, "token": "\u0120Farmers", "merges": "\u0120Farm ers", "count": 164, "type": "remove by frequency"} -{"id": 32406, "token": "\u0120MAPK", "merges": "\u0120MAP K", "count": 164, "type": "remove by frequency"} -{"id": 43423, "token": "\u0120PKC", "merges": "\u0120PK C", "count": 164, "type": "remove by frequency"} -{"id": 38118, "token": "\u0120FGF", "merges": "\u0120F GF", "count": 164, "type": "remove by frequency"} -{"id": 38395, "token": "\u0120HbA", "merges": "\u0120Hb A", "count": 164, "type": "remove by frequency"} -{"id": 28653, "token": "\\];", "merges": "\\] ;", "count": 164, "type": "remove by frequency"} -{"id": 40892, "token": "\u0120Copenhagen", "merges": "\u0120C openhagen", "count": 165, "type": "remove by frequency"} -{"id": 6796, "token": "widetilde", "merges": "widet ilde", "count": 165, "type": "remove by frequency"} -{"id": 48922, "token": "\u0120Raleigh", "merges": "\u0120R aleigh", "count": 165, "type": "remove by frequency"} -{"id": 45338, "token": "\u0120axonal", "merges": "\u0120ax onal", "count": 165, "type": "remove by frequency"} -{"id": 39003, "token": "\u0120Mitch", "merges": "\u0120M itch", "count": 165, "type": "remove by frequency"} -{"id": 49177, "token": "\u0120Elvis", "merges": "\u0120El vis", "count": 165, "type": "remove by frequency"} -{"id": 40281, "token": "\u0120acyl", "merges": "\u0120ac yl", "count": 165, "type": "remove by frequency"} -{"id": 42518, "token": "\u0120Rica", "merges": "\u0120R ica", "count": 165, "type": "remove by frequency"} -{"id": 31807, "token": "')$", "merges": "' )$", "count": 165, "type": "remove by frequency"} -{"id": 30696, "token": "\u0120Regarding", "merges": "\u0120Reg arding", "count": 166, "type": "remove by frequency"} -{"id": 33028, "token": "leftarrow", "merges": "left arrow", "count": 166, "type": "remove by frequency"} -{"id": 43905, "token": "\u0120Bulgaria", "merges": "\u0120Bulgar ia", "count": 166, "type": "remove by frequency"} -{"id": 31397, "token": "\u0120Trustee", "merges": "\u0120Trust ee", "count": 166, "type": "remove by frequency"} -{"id": 30449, "token": "\u0120Eagles", "merges": "\u0120E agles", "count": 166, "type": "remove by frequency"} -{"id": 40146, "token": "\u0120\u00cf\u0122\u00ce\u00b5", "merges": "\u0120\u00cf\u0122 \u00ce\u00b5", "count": 166, "type": "remove by frequency"} -{"id": 29007, "token": "\u0120NPs", "merges": "\u0120N Ps", "count": 166, "type": "remove by frequency"} -{"id": 49095, "token": "\u0120EVs", "merges": "\u0120EV s", "count": 166, "type": "remove by frequency"} -{"id": 50360, "token": "\u00e2\u0138\u012d", "merges": "\u00e2\u0138 \u012d", "count": 166, "type": "remove by frequency"} -{"id": 50321, "token": "\u00e2\u012a\u00a3", "merges": "\u00e2\u012a \u00a3", "count": 166, "type": "remove by frequency"} -{"id": 44688, "token": "findViewById", "merges": "find ViewById", "count": 167, "type": "remove by frequency"} -{"id": 29171, "token": "\u0120Hungary", "merges": "\u0120Hung ary", "count": 167, "type": "remove by frequency"} -{"id": 49929, "token": "\u0120\u00ce\u00b5\u00cf\u0122\u00ce\u00b9", "merges": "\u0120\u00ce\u00b5\u00cf\u0122 \u00ce\u00b9", "count": 167, "type": "remove by frequency"} -{"id": 35020, "token": "\u0120Nepal", "merges": "\u0120Nep al", "count": 167, "type": "remove by frequency"} -{"id": 41180, "token": "\u0120superoxide", "merges": "\u0120super oxide", "count": 168, "type": "remove by frequency"} -{"id": 37497, "token": "\u0120Katherine", "merges": "\u0120K atherine", "count": 168, "type": "remove by frequency"} -{"id": 40282, "token": "\u0120Adelaide", "merges": "\u0120Ad elaide", "count": 168, "type": "remove by frequency"} -{"id": 46543, "token": "\u0120blots", "merges": "\u0120bl ots", "count": 168, "type": "remove by frequency"} -{"id": 36935, "token": "\u0120Filip", "merges": "\u0120Fil ip", "count": 168, "type": "remove by frequency"} -{"id": 49557, "token": "\u0120Ricky", "merges": "\u0120Rick y", "count": 168, "type": "remove by frequency"} -{"id": 49567, "token": "\u0120Peggy", "merges": "\u0120Peg gy", "count": 168, "type": "remove by frequency"} -{"id": 50167, "token": "\u0120Dors", "merges": "\u0120D ors", "count": 168, "type": "remove by frequency"} -{"id": 33148, "token": "\u0120lysis", "merges": "\u0120l ysis", "count": 169, "type": "remove by frequency"} -{"id": 49954, "token": "\u0120Sixty", "merges": "\u0120Six ty", "count": 169, "type": "remove by frequency"} -{"id": 39318, "token": "\u010a\u00c2\u0142\u00c2\u0142", "merges": "\u010a \u00c2\u0142\u00c2\u0142", "count": 169, "type": "remove by frequency"} -{"id": 49100, "token": "\u00cf\u0123\u00cf\u0130", "merges": "\u00cf\u0123 \u00cf\u0130", "count": 169, "type": "remove by frequency"} -{"id": 45728, "token": "\u0120DHS", "merges": "\u0120D HS", "count": 169, "type": "remove by frequency"} -{"id": 43666, "token": "\u0120\u00ce\u013c", "merges": "\u0120\u00ce \u013c", "count": 169, "type": "remove by frequency"} -{"id": 31053, "token": "].)", "merges": "]. )", "count": 169, "type": "remove by frequency"} -{"id": 28866, "token": "\u0120Immigration", "merges": "\u0120Imm igration", "count": 170, "type": "remove by frequency"} -{"id": 10729, "token": "\u0120incubated", "merges": "\u0120incub ated", "count": 170, "type": "remove by frequency"} -{"id": 44677, "token": "\u0120Cornwall", "merges": "\u0120Corn wall", "count": 170, "type": "remove by frequency"} -{"id": 48882, "token": "\u0120Freddie", "merges": "\u0120Fred die", "count": 170, "type": "remove by frequency"} -{"id": 48838, "token": "\u0120Epidem", "merges": "\u0120Epid em", "count": 170, "type": "remove by frequency"} -{"id": 35251, "token": "\u0120Boeing", "merges": "\u0120Bo eing", "count": 170, "type": "remove by frequency"} -{"id": 31642, "token": "\u0120Lanka", "merges": "\u0120L anka", "count": 170, "type": "remove by frequency"} -{"id": 23199, "token": "\u0120VEGF", "merges": "\u0120VE GF", "count": 170, "type": "remove by frequency"} -{"id": 35493, "token": "aient", "merges": "a ient", "count": 170, "type": "remove by frequency"} -{"id": 44027, "token": "\u0120microbiome", "merges": "\u0120microbi ome", "count": 171, "type": "remove by frequency"} -{"id": 31402, "token": "\u0120virulence", "merges": "\u0120vir ulence", "count": 171, "type": "remove by frequency"} -{"id": 46665, "token": "\u0120Ethical", "merges": "\u0120Eth ical", "count": 171, "type": "remove by frequency"} -{"id": 46249, "token": "\u0120v\u00c3\u00a6re", "merges": "\u0120v\u00c3\u00a6 re", "count": 171, "type": "remove by frequency"} -{"id": 48038, "token": "\u0120Cory", "merges": "\u0120C ory", "count": 171, "type": "remove by frequency"} -{"id": 47568, "token": "\u0120\u00e0\u00b2", "merges": "\u0120 \u00e0\u00b2", "count": 171, "type": "remove by frequency"} -{"id": 30536, "token": "Rightarrow", "merges": "Right arrow", "count": 172, "type": "remove by frequency"} -{"id": 36480, "token": "\u0120Superman", "merges": "\u0120Super man", "count": 172, "type": "remove by frequency"} -{"id": 45500, "token": "\u0120Acting", "merges": "\u0120Act ing", "count": 172, "type": "remove by frequency"} -{"id": 50168, "token": "\u0120Slav", "merges": "\u0120Sl av", "count": 172, "type": "remove by frequency"} -{"id": 40647, "token": "\u0120\u00cf\u0125\u00ce\u00b5", "merges": "\u0120\u00cf\u0125 \u00ce\u00b5", "count": 172, "type": "remove by frequency"} -{"id": 35830, "token": "]_{", "merges": "] _{", "count": 172, "type": "remove by frequency"} -{"id": 40527, "token": "\u0120\\:", "merges": "\u0120\\ :", "count": 172, "type": "remove by frequency"} -{"id": 18531, "token": "\u0120Mississippi", "merges": "\u0120Miss issippi", "count": 173, "type": "remove by frequency"} -{"id": 48587, "token": "\u0120Seriously", "merges": "\u0120Ser iously", "count": 173, "type": "remove by frequency"} -{"id": 31003, "token": "\u0120Colombia", "merges": "\u0120Colomb ia", "count": 173, "type": "remove by frequency"} -{"id": 18175, "token": "\u0120Hillary", "merges": "\u0120Hill ary", "count": 173, "type": "remove by frequency"} -{"id": 46287, "token": "\u0120Buenos", "merges": "\u0120Bu enos", "count": 173, "type": "remove by frequency"} -{"id": 49389, "token": "\u0120Himal", "merges": "\u0120H imal", "count": 173, "type": "remove by frequency"} -{"id": 40771, "token": "\u0120Shane", "merges": "\u0120Sh ane", "count": 173, "type": "remove by frequency"} -{"id": 50378, "token": "\u00e2\u0139\u00a4", "merges": "\u00e2\u0139 \u00a4", "count": 173, "type": "remove by frequency"} -{"id": 19135, "token": "}^*", "merges": "}^ *", "count": 173, "type": "remove by frequency"} -{"id": 47206, "token": "_{*", "merges": "_{ *", "count": 173, "type": "remove by frequency"} -{"id": 23022, "token": "\u0120\u00c3\u013a", "merges": "\u0120\u00c3 \u013a", "count": 173, "type": "remove by frequency"} -{"id": 21070, "token": "\u00c4\u0122", "merges": "\u00c4 \u0122", "count": 173, "type": "remove by frequency"} -{"id": 28354, "token": "\u0120\u00cf\u0122\u00cf\u0123\u00ce\u00bf", "merges": "\u0120\u00cf\u0122 \u00cf\u0123\u00ce\u00bf", "count": 174, "type": "remove by frequency"} -{"id": 41792, "token": "\u0120glioma", "merges": "\u0120gli oma", "count": 174, "type": "remove by frequency"} -{"id": 47832, "token": "\u0120Malta", "merges": "\u0120Mal ta", "count": 174, "type": "remove by frequency"} -{"id": 35943, "token": "\u0120cAMP", "merges": "\u0120c AMP", "count": 174, "type": "remove by frequency"} -{"id": 34500, "token": "\u00ce\u0143\u00cf\u0124", "merges": "\u00ce\u0143 \u00cf\u0124", "count": 174, "type": "remove by frequency"} -{"id": 41009, "token": "\u0120McConnell", "merges": "\u0120Mc Connell", "count": 175, "type": "remove by frequency"} -{"id": 34159, "token": "\u0120Helsinki", "merges": "\u0120Hels inki", "count": 175, "type": "remove by frequency"} -{"id": 40607, "token": "\u0120Cochrane", "merges": "\u0120Co chrane", "count": 175, "type": "remove by frequency"} -{"id": 47137, "token": "\u0120Valencia", "merges": "\u0120Val encia", "count": 175, "type": "remove by frequency"} -{"id": 34248, "token": "\u0120serine", "merges": "\u0120ser ine", "count": 175, "type": "remove by frequency"} -{"id": 39718, "token": "\u0120Acute", "merges": "\u0120Ac ute", "count": 175, "type": "remove by frequency"} -{"id": 46620, "token": "\u0120sehr", "merges": "\u0120se hr", "count": 175, "type": "remove by frequency"} -{"id": 42313, "token": "\u00ce\u00ac\u00ce\u00bd", "merges": "\u00ce\u00ac \u00ce\u00bd", "count": 175, "type": "remove by frequency"} -{"id": 36225, "token": "\u0120HEK", "merges": "\u0120HE K", "count": 175, "type": "remove by frequency"} -{"id": 46194, "token": "\u0120Gus", "merges": "\u0120G us", "count": 175, "type": "remove by frequency"} -{"id": 17372, "token": "\u0120Detroit", "merges": "\u0120Det roit", "count": 176, "type": "remove by frequency"} -{"id": 37063, "token": "\u0120Bengal", "merges": "\u0120Beng al", "count": 176, "type": "remove by frequency"} -{"id": 33147, "token": "oblot", "merges": "ob lot", "count": 176, "type": "remove by frequency"} -{"id": 38093, "token": "\u00cf\u0123\u00cf\u012b", "merges": "\u00cf\u0123 \u00cf\u012b", "count": 176, "type": "remove by frequency"} -{"id": 47858, "token": "\u0120Taj", "merges": "\u0120T aj", "count": 176, "type": "remove by frequency"} -{"id": 45299, "token": "\u00c3\u00b8re", "merges": "\u00c3\u00b8 re", "count": 176, "type": "remove by frequency"} -{"id": 33354, "token": "\\|^", "merges": "\\| ^", "count": 176, "type": "remove by frequency"} -{"id": 34038, "token": "Decided", "merges": "Dec ided", "count": 177, "type": "remove by frequency"} -{"id": 26642, "token": "\u0120Martha", "merges": "\u0120Mar tha", "count": 177, "type": "remove by frequency"} -{"id": 36180, "token": "\u0120Latino", "merges": "\u0120Lat ino", "count": 177, "type": "remove by frequency"} -{"id": 25287, "token": "\u0120Quebec", "merges": "\u0120Que bec", "count": 177, "type": "remove by frequency"} -{"id": 36758, "token": "pdev", "merges": "p dev", "count": 177, "type": "remove by frequency"} -{"id": 1764, "token": "\u0120$$\\", "merges": "\u0120$ $\\", "count": 177, "type": "remove by frequency"} -{"id": 39242, "token": "\u00ce\u00b1\u00cf\u0124", "merges": "\u00ce\u00b1 \u00cf\u0124", "count": 177, "type": "remove by frequency"} -{"id": 33809, "token": "\u0120transmembrane", "merges": "\u0120trans membrane", "count": 178, "type": "remove by frequency"} -{"id": 34956, "token": "\u0120cytochrome", "merges": "\u0120cy tochrome", "count": 178, "type": "remove by frequency"} -{"id": 38321, "token": "\u0120Monsieur", "merges": "\u0120Mons ieur", "count": 178, "type": "remove by frequency"} -{"id": 21737, "token": "\u0120Licensed", "merges": "\u0120Lic ensed", "count": 178, "type": "remove by frequency"} -{"id": 47969, "token": "raisebox", "merges": "raise box", "count": 178, "type": "remove by frequency"} -{"id": 30979, "token": "\u0120Madame", "merges": "\u0120Mad ame", "count": 178, "type": "remove by frequency"} -{"id": 34829, "token": "\u0120Biosc", "merges": "\u0120Bios c", "count": 178, "type": "remove by frequency"} -{"id": 26563, "token": "\u0120Cinc", "merges": "\u0120C inc", "count": 178, "type": "remove by frequency"} -{"id": 45169, "token": "\u0120immunosuppressive", "merges": "\u0120immunosupp ressive", "count": 179, "type": "remove by frequency"} -{"id": 48766, "token": "\u0120thermoplastic", "merges": "\u0120therm oplastic", "count": 179, "type": "remove by frequency"} -{"id": 35563, "token": "\u0120carcinomas", "merges": "\u0120carcin omas", "count": 179, "type": "remove by frequency"} -{"id": 42346, "token": "\u0120offseason", "merges": "\u0120off season", "count": 179, "type": "remove by frequency"} -{"id": 42323, "token": "\u0120secretory", "merges": "\u0120secret ory", "count": 179, "type": "remove by frequency"} -{"id": 48509, "token": "\u0120telomere", "merges": "\u0120telome re", "count": 179, "type": "remove by frequency"} -{"id": 35732, "token": "\u0120Estados", "merges": "\u0120Est ados", "count": 179, "type": "remove by frequency"} -{"id": 36718, "token": "textstyle", "merges": "text style", "count": 180, "type": "remove by frequency"} -{"id": 48432, "token": "\u0120Nissan", "merges": "\u0120Niss an", "count": 180, "type": "remove by frequency"} -{"id": 47203, "token": "\u0120\u00c3\u00a4n", "merges": "\u0120\u00c3\u00a4 n", "count": 180, "type": "remove by frequency"} -{"id": 45162, "token": "\u0120UAE", "merges": "\u0120U AE", "count": 180, "type": "remove by frequency"} -{"id": 21846, "token": "\u0120\\!", "merges": "\u0120\\ !", "count": 180, "type": "remove by frequency"} -{"id": 18304, "token": "\u0120transcriptional", "merges": "\u0120transcript ional", "count": 181, "type": "remove by frequency"} -{"id": 42561, "token": "\u0120radiographic", "merges": "\u0120radi ographic", "count": 181, "type": "remove by frequency"} -{"id": 41418, "token": "\u0120glomerular", "merges": "\u0120glomer ular", "count": 181, "type": "remove by frequency"} -{"id": 41120, "token": "\u0120Esther", "merges": "\u0120Est her", "count": 181, "type": "remove by frequency"} -{"id": 23401, "token": "\u00e2\u0122\u0125\u00e2\u0122\u0125", "merges": "\u00e2\u0122\u0125 \u00e2\u0122\u0125", "count": 181, "type": "remove by frequency"} -{"id": 29630, "token": "\u0120Welsh", "merges": "\u0120Wel sh", "count": 181, "type": "remove by frequency"} -{"id": 26855, "token": "\u0120\u00cf\u0125\u00cf\u0127", "merges": "\u0120\u00cf\u0125 \u00cf\u0127", "count": 181, "type": "remove by frequency"} -{"id": 40287, "token": "\u0120pr\u00c3\u00a1", "merges": "\u0120pr \u00c3\u00a1", "count": 181, "type": "remove by frequency"} -{"id": 30096, "token": "f\u00c3\u00b6r", "merges": "f \u00c3\u00b6r", "count": 181, "type": "remove by frequency"} -{"id": 50284, "token": "\u00cb\u012c", "merges": "\u00cb \u012c", "count": 181, "type": "remove by frequency"} -{"id": 34421, "token": "\u0120Yorkshire", "merges": "\u0120Y orkshire", "count": 182, "type": "remove by frequency"} -{"id": 32683, "token": "\u0120Allied", "merges": "\u0120All ied", "count": 182, "type": "remove by frequency"} -{"id": 21938, "token": "\u0120[\u00e2\u0122\u00a6]", "merges": "\u0120[ \u00e2\u0122\u00a6]", "count": 182, "type": "remove by frequency"} -{"id": 48295, "token": "Lastly", "merges": "Last ly", "count": 182, "type": "remove by frequency"} -{"id": 37919, "token": "\u0120gyrus", "merges": "\u0120g yrus", "count": 182, "type": "remove by frequency"} -{"id": 36684, "token": "\u0120\u00ce\u00b1\u00ce\u00bd", "merges": "\u0120\u00ce\u00b1 \u00ce\u00bd", "count": 182, "type": "remove by frequency"} -{"id": 43047, "token": "\u0120Tuc", "merges": "\u0120T uc", "count": 182, "type": "remove by frequency"} -{"id": 41302, "token": "},$", "merges": "}, $", "count": 182, "type": "remove by frequency"} -{"id": 30252, "token": "\u0120parlament", "merges": "\u0120par lament", "count": 183, "type": "remove by frequency"} -{"id": 17960, "token": "\u0120Scottish", "merges": "\u0120Scott ish", "count": 183, "type": "remove by frequency"} -{"id": 35278, "token": "\u0120\u00ce\u00b1\u00cf\u0122\u00cf\u012e", "merges": "\u0120\u00ce\u00b1 \u00cf\u0122\u00cf\u012e", "count": 183, "type": "remove by frequency"} -{"id": 26656, "token": "\u0120Biden", "merges": "\u0120B iden", "count": 183, "type": "remove by frequency"} -{"id": 48835, "token": "\u0120arbe", "merges": "\u0120ar be", "count": 183, "type": "remove by frequency"} -{"id": 44538, "token": "\u0120IHC", "merges": "\u0120I HC", "count": 183, "type": "remove by frequency"} -{"id": 28748, "token": "\u0120Queensland", "merges": "\u0120Queens land", "count": 184, "type": "remove by frequency"} -{"id": 30907, "token": "\u0120Vietnamese", "merges": "\u0120Viet namese", "count": 184, "type": "remove by frequency"} -{"id": 37708, "token": "\u0120Americas", "merges": "\u0120Americ as", "count": 184, "type": "remove by frequency"} -{"id": 48816, "token": "\u0120Hercules", "merges": "\u0120Her cules", "count": 184, "type": "remove by frequency"} -{"id": 31863, "token": "\u0120spermat", "merges": "\u0120sper mat", "count": 184, "type": "remove by frequency"} -{"id": 47770, "token": "\u0120Severe", "merges": "\u0120Se vere", "count": 184, "type": "remove by frequency"} -{"id": 32156, "token": "^*_", "merges": "^* _", "count": 184, "type": "remove by frequency"} -{"id": 37055, "token": "'}$", "merges": "' }$", "count": 184, "type": "remove by frequency"} -{"id": 18292, "token": "\u0120Supplementary", "merges": "\u0120Supp lementary", "count": 185, "type": "remove by frequency"} -{"id": 41130, "token": "\u0120ferromagnetic", "merges": "\u0120fer romagnetic", "count": 185, "type": "remove by frequency"} -{"id": 14723, "token": "\u0120Invention", "merges": "\u0120In vention", "count": 185, "type": "remove by frequency"} -{"id": 44203, "token": "\u0120parietal", "merges": "\u0120par ietal", "count": 185, "type": "remove by frequency"} -{"id": 28624, "token": "\u0120agonist", "merges": "\u0120agon ist", "count": 185, "type": "remove by frequency"} -{"id": 47252, "token": "\u0120intrav", "merges": "\u0120int rav", "count": 185, "type": "remove by frequency"} -{"id": 28364, "token": "\u0120Satan", "merges": "\u0120Sat an", "count": 185, "type": "remove by frequency"} -{"id": 32874, "token": "\u0120Kerry", "merges": "\u0120K erry", "count": 185, "type": "remove by frequency"} -{"id": 44988, "token": "NFTA", "merges": "N FTA", "count": 185, "type": "remove by frequency"} -{"id": 38006, "token": "\\~", "merges": "\\ ~", "count": 185, "type": "remove by frequency"} -{"id": 45273, "token": "\u0120Scientists", "merges": "\u0120Scient ists", "count": 186, "type": "remove by frequency"} -{"id": 37134, "token": "\u0120\u00ce\u00b5\u00ce\u00af\u00ce\u00bd\u00ce\u00b1\u00ce\u00b9", "merges": "\u0120\u00ce\u00b5 \u00ce\u00af\u00ce\u00bd\u00ce\u00b1\u00ce\u00b9", "count": 186, "type": "remove by frequency"} -{"id": 47857, "token": "\u0120Antarctic", "merges": "\u0120Antar ctic", "count": 186, "type": "remove by frequency"} -{"id": 23856, "token": "\u0120Nigeria", "merges": "\u0120Nig eria", "count": 186, "type": "remove by frequency"} -{"id": 46389, "token": "\u0120Judith", "merges": "\u0120Jud ith", "count": 186, "type": "remove by frequency"} -{"id": 46600, "token": "\u0120Shawn", "merges": "\u0120Sh awn", "count": 186, "type": "remove by frequency"} -{"id": 35274, "token": "\u0120Aristotle", "merges": "\u0120Arist otle", "count": 187, "type": "remove by frequency"} -{"id": 48067, "token": "\u00c3\u00bcssen", "merges": "\u00c3\u00bcss en", "count": 187, "type": "remove by frequency"} -{"id": 27946, "token": "\u0120Kenya", "merges": "\u0120Ken ya", "count": 187, "type": "remove by frequency"} -{"id": 29691, "token": "\u0120centrifugation", "merges": "\u0120centrifug ation", "count": 188, "type": "remove by frequency"} -{"id": 20386, "token": "CHANTABILITY", "merges": "CHANT ABILITY", "count": 188, "type": "remove by frequency"} -{"id": 45889, "token": "\u0120mutagenesis", "merges": "\u0120mutagen esis", "count": 188, "type": "remove by frequency"} -{"id": 43446, "token": "\u0120COUNTY", "merges": "\u0120COUNT Y", "count": 188, "type": "remove by frequency"} -{"id": 25976, "token": "..\\..\\", "merges": "..\\ ..\\", "count": 188, "type": "remove by frequency"} -{"id": 12523, "token": "}}(\\", "merges": "}} (\\", "count": 188, "type": "remove by frequency"} -{"id": 34465, "token": "}=(", "merges": "}= (", "count": 188, "type": "remove by frequency"} -{"id": 44347, "token": "\u0120Conventional", "merges": "\u0120Convention al", "count": 189, "type": "remove by frequency"} -{"id": 30577, "token": "\u0120phosphatase", "merges": "\u0120phosphat ase", "count": 189, "type": "remove by frequency"} -{"id": 49466, "token": "\u0120INCIDENT", "merges": "\u0120INC IDENT", "count": 189, "type": "remove by frequency"} -{"id": 24213, "token": "propto", "merges": "pro pto", "count": 189, "type": "remove by frequency"} -{"id": 35593, "token": "\\*\\*\\*", "merges": "\\*\\* \\*", "count": 189, "type": "remove by frequency"} -{"id": 35436, "token": "\u0120Donna", "merges": "\u0120Don na", "count": 189, "type": "remove by frequency"} -{"id": 48076, "token": "\u0120Elsa", "merges": "\u0120El sa", "count": 189, "type": "remove by frequency"} -{"id": 47741, "token": "\u0120RPE", "merges": "\u0120R PE", "count": 189, "type": "remove by frequency"} -{"id": 24036, "token": "\u0120\u00cf\u012e", "merges": "\u0120 \u00cf\u012e", "count": 189, "type": "remove by frequency"} -{"id": 41146, "token": "\u0120senescence", "merges": "\u0120sen escence", "count": 190, "type": "remove by frequency"} -{"id": 33537, "token": "\u0120biopsies", "merges": "\u0120bi opsies", "count": 190, "type": "remove by frequency"} -{"id": 37642, "token": "\u0120Congo", "merges": "\u0120Cong o", "count": 190, "type": "remove by frequency"} -{"id": 49015, "token": "\u0120Sey", "merges": "\u0120Se y", "count": 190, "type": "remove by frequency"} -{"id": 48155, "token": "ss\u00c3\u00a4", "merges": "ss \u00c3\u00a4", "count": 190, "type": "remove by frequency"} -{"id": 33401, "token": "\u0120\\|\\", "merges": "\u0120\\ |\\", "count": 190, "type": "remove by frequency"} -{"id": 34241, "token": "\u0120conformational", "merges": "\u0120conform ational", "count": 191, "type": "remove by frequency"} -{"id": 43622, "token": "\u0120Differentiate", "merges": "\u0120Different iate", "count": 191, "type": "remove by frequency"} -{"id": 42831, "token": "microorganisms", "merges": "micro organisms", "count": 191, "type": "remove by frequency"} -{"id": 38702, "token": "\u0120micrograms", "merges": "\u0120micro grams", "count": 191, "type": "remove by frequency"} -{"id": 34596, "token": "\u0120peroxidase", "merges": "\u0120perox idase", "count": 191, "type": "remove by frequency"} -{"id": 37121, "token": "\u0120adenosine", "merges": "\u0120aden osine", "count": 191, "type": "remove by frequency"} -{"id": 40431, "token": "\u0120Warsaw", "merges": "\u0120Wars aw", "count": 191, "type": "remove by frequency"} -{"id": 41638, "token": "\u00c3\u00a4hler", "merges": "\u00c3\u00a4h ler", "count": 191, "type": "remove by frequency"} -{"id": 44965, "token": "\u0120[[*", "merges": "\u0120[[ *", "count": 191, "type": "remove by frequency"} -{"id": 32507, "token": "\u0120AGN", "merges": "\u0120AG N", "count": 191, "type": "remove by frequency"} -{"id": 49233, "token": "\u00e0\u00bd", "merges": "\u00e0 \u00bd", "count": 191, "type": "remove by frequency"} -{"id": 16257, "token": "\u0120Meanwhile", "merges": "\u0120Mean while", "count": 192, "type": "remove by frequency"} -{"id": 47922, "token": "\u0120Brewing", "merges": "\u0120Brew ing", "count": 192, "type": "remove by frequency"} -{"id": 41592, "token": "\u0120Tibetan", "merges": "\u0120Tibet an", "count": 192, "type": "remove by frequency"} -{"id": 34454, "token": "\u0120)$", "merges": "\u0120 )$", "count": 192, "type": "remove by frequency"} -{"id": 34862, "token": "\u0120triplicate", "merges": "\u0120tri plicate", "count": 193, "type": "remove by frequency"} -{"id": 44542, "token": "\u0120Ethan", "merges": "\u0120Eth an", "count": 193, "type": "remove by frequency"} -{"id": 37073, "token": "\u0120Katie", "merges": "\u0120Kat ie", "count": 193, "type": "remove by frequency"} -{"id": 41896, "token": "\u0120j\u00c3\u00a4", "merges": "\u0120j \u00c3\u00a4", "count": 193, "type": "remove by frequency"} -{"id": 30793, "token": "\u00ce\u00bf\u00ce\u00b9", "merges": "\u00ce\u00bf \u00ce\u00b9", "count": 193, "type": "remove by frequency"} -{"id": 19585, "token": "\u0120Baltimore", "merges": "\u0120Balt imore", "count": 194, "type": "remove by frequency"} -{"id": 31877, "token": "\u0120INCLUDING", "merges": "\u0120IN CLUDING", "count": 194, "type": "remove by frequency"} -{"id": 44435, "token": "\u0120Colonial", "merges": "\u0120Col onial", "count": 194, "type": "remove by frequency"} -{"id": 35245, "token": "\u00ce\u00af\u00ce\u00bd\u00ce\u00b1\u00ce\u00b9", "merges": "\u00ce\u00af\u00ce\u00bd \u00ce\u00b1\u00ce\u00b9", "count": 194, "type": "remove by frequency"} -{"id": 38994, "token": "\u0120Melissa", "merges": "\u0120Mel issa", "count": 194, "type": "remove by frequency"} -{"id": 33417, "token": "\u0120Courts", "merges": "\u0120Court s", "count": 194, "type": "remove by frequency"} -{"id": 45526, "token": "\u0120Bulls", "merges": "\u0120Bull s", "count": 194, "type": "remove by frequency"} -{"id": 32221, "token": "\u0120SHALL", "merges": "\u0120SH ALL", "count": 194, "type": "remove by frequency"} -{"id": 49671, "token": "\u0120Mild", "merges": "\u0120M ild", "count": 194, "type": "remove by frequency"} -{"id": 46292, "token": "\u0120v\u00c3\u00bd", "merges": "\u0120v \u00c3\u00bd", "count": 194, "type": "remove by frequency"} -{"id": 45530, "token": "\u0120Osw", "merges": "\u0120O sw", "count": 194, "type": "remove by frequency"} -{"id": 32400, "token": "WKH", "merges": "W KH", "count": 194, "type": "remove by frequency"} -{"id": 49020, "token": "\u0120triglyceride", "merges": "\u0120triglycer ide", "count": 195, "type": "remove by frequency"} -{"id": 23273, "token": "\u0120fibroblasts", "merges": "\u0120fibrobl asts", "count": 195, "type": "remove by frequency"} -{"id": 35836, "token": "\u0120zebrafish", "merges": "\u0120zebra fish", "count": 195, "type": "remove by frequency"} -{"id": 27687, "token": "\u0120Orlando", "merges": "\u0120Or lando", "count": 195, "type": "remove by frequency"} -{"id": 38012, "token": "\u0120MNRAS", "merges": "\u0120M NRAS", "count": 195, "type": "remove by frequency"} -{"id": 23222, "token": "\u0120Conservative", "merges": "\u0120Conserv ative", "count": 196, "type": "remove by frequency"} -{"id": 39044, "token": "\u0120fibroblast", "merges": "\u0120fibrobl ast", "count": 196, "type": "remove by frequency"} -{"id": 49846, "token": "\u0120heterozyg", "merges": "\u0120heter ozyg", "count": 196, "type": "remove by frequency"} -{"id": 39528, "token": "\u0120Rochester", "merges": "\u0120Roche ster", "count": 196, "type": "remove by frequency"} -{"id": 46761, "token": "\u0120albicans", "merges": "\u0120alb icans", "count": 196, "type": "remove by frequency"} -{"id": 18984, "token": "\u0120ovarian", "merges": "\u0120ovar ian", "count": 196, "type": "remove by frequency"} -{"id": 47144, "token": "\u0120GOODS", "merges": "\u0120GOOD S", "count": 196, "type": "remove by frequency"} -{"id": 37938, "token": "\u0120loro", "merges": "\u0120l oro", "count": 196, "type": "remove by frequency"} -{"id": 24110, "token": "\u0120Brazilian", "merges": "\u0120Brazil ian", "count": 197, "type": "remove by frequency"} -{"id": 45208, "token": "\u0120plasmon", "merges": "\u0120plas mon", "count": 197, "type": "remove by frequency"} -{"id": 48507, "token": "\u0120splen", "merges": "\u0120spl en", "count": 197, "type": "remove by frequency"} -{"id": 30065, "token": "\u0120Celt", "merges": "\u0120C elt", "count": 197, "type": "remove by frequency"} -{"id": 43145, "token": "hljs", "merges": "hl js", "count": 197, "type": "remove by frequency"} -{"id": 40406, "token": "\u00c3\u00a5ng", "merges": "\u00c3\u00a5 ng", "count": 197, "type": "remove by frequency"} -{"id": 39093, "token": "\u0120emulsion", "merges": "\u0120em ulsion", "count": 198, "type": "remove by frequency"} -{"id": 49512, "token": "\u0120Inhib", "merges": "\u0120In hib", "count": 198, "type": "remove by frequency"} -{"id": 46797, "token": "\u0120Chick", "merges": "\u0120Ch ick", "count": 198, "type": "remove by frequency"} -{"id": 41951, "token": "\u0120Kyoto", "merges": "\u0120Ky oto", "count": 198, "type": "remove by frequency"} -{"id": 46286, "token": "\u0120phenomenological", "merges": "\u0120phenomen ological", "count": 199, "type": "remove by frequency"} -{"id": 21609, "token": "\u0120Mediterranean", "merges": "\u0120Med iterranean", "count": 199, "type": "remove by frequency"} -{"id": 24256, "token": "\u0120cytoplasmic", "merges": "\u0120cytoplasm ic", "count": 199, "type": "remove by frequency"} -{"id": 37558, "token": "\u0120Legislative", "merges": "\u0120Legisl ative", "count": 199, "type": "remove by frequency"} -{"id": 47139, "token": "\u0120Wrestling", "merges": "\u0120Wrest ling", "count": 199, "type": "remove by frequency"} -{"id": 39853, "token": "\u0120reductase", "merges": "\u0120reduct ase", "count": 199, "type": "remove by frequency"} -{"id": 29530, "token": "\u0120plasmids", "merges": "\u0120plasm ids", "count": 199, "type": "remove by frequency"} -{"id": 26304, "token": "\u0120Danish", "merges": "\u0120Dan ish", "count": 199, "type": "remove by frequency"} -{"id": 37603, "token": "\u0120Slov", "merges": "\u0120Sl ov", "count": 199, "type": "remove by frequency"} -{"id": 31805, "token": "\u0120Peru", "merges": "\u0120Per u", "count": 199, "type": "remove by frequency"} -{"id": 40051, "token": "\u00ce\u00b1\u00cf\u0125", "merges": "\u00ce\u00b1 \u00cf\u0125", "count": 199, "type": "remove by frequency"} -{"id": 45444, "token": "\u0120CDs", "merges": "\u0120CD s", "count": 199, "type": "remove by frequency"} -{"id": 45944, "token": "\u0120glycine", "merges": "\u0120glyc ine", "count": 200, "type": "remove by frequency"} -{"id": 46623, "token": "\u0120baryon", "merges": "\u0120bary on", "count": 200, "type": "remove by frequency"} -{"id": 40914, "token": "textsc", "merges": "text sc", "count": 200, "type": "remove by frequency"} -{"id": 29494, "token": "\u0120ERK", "merges": "\u0120ER K", "count": 200, "type": "remove by frequency"} -{"id": 17180, "token": "|_{", "merges": "| _{", "count": 200, "type": "remove by frequency"} -{"id": 29715, "token": "=-\\", "merges": "= -\\", "count": 200, "type": "remove by frequency"} -{"id": 25984, "token": "&&\\", "merges": "& &\\", "count": 200, "type": "remove by frequency"} -{"id": 42936, "token": "\u0120Stafford", "merges": "\u0120Staff ord", "count": 201, "type": "remove by frequency"} -{"id": 48824, "token": "\u0120Wilcoxon", "merges": "\u0120Wilcox on", "count": 201, "type": "remove by frequency"} -{"id": 9315, "token": "qquad", "merges": "q quad", "count": 201, "type": "remove by frequency"} -{"id": 39672, "token": "\u0120Nous", "merges": "\u0120N ous", "count": 201, "type": "remove by frequency"} -{"id": 50354, "token": "\u00e2\u0137\u00b3", "merges": "\u00e2\u0137 \u00b3", "count": 201, "type": "remove by frequency"} -{"id": 43591, "token": "LRQ", "merges": "LR Q", "count": 201, "type": "remove by frequency"} -{"id": 47769, "token": "\u0120Inhibition", "merges": "\u0120In hibition", "count": 202, "type": "remove by frequency"} -{"id": 47229, "token": "\u0120Emirates", "merges": "\u0120Emir ates", "count": 202, "type": "remove by frequency"} -{"id": 29026, "token": "\u0120Alberta", "merges": "\u0120Alber ta", "count": 202, "type": "remove by frequency"} -{"id": 13376, "token": "\u0120Afghan", "merges": "\u0120Af ghan", "count": 202, "type": "remove by frequency"} -{"id": 44870, "token": "\u0120Trevor", "merges": "\u0120Tre vor", "count": 202, "type": "remove by frequency"} -{"id": 30396, "token": "\u0120Romans", "merges": "\u0120Rom ans", "count": 202, "type": "remove by frequency"} -{"id": 46275, "token": "/](", "merges": "/ ](", "count": 202, "type": "remove by frequency"} -{"id": 44045, "token": "\u0120prospectively", "merges": "\u0120prospect ively", "count": 203, "type": "remove by frequency"} -{"id": 15605, "token": "\u0120Commissioner", "merges": "\u0120Commission er", "count": 203, "type": "remove by frequency"} -{"id": 40856, "token": "\u0120Bachelor", "merges": "\u0120B achelor", "count": 203, "type": "remove by frequency"} -{"id": 37209, "token": "\u0120Officers", "merges": "\u0120Offic ers", "count": 203, "type": "remove by frequency"} -{"id": 40828, "token": "\u0120Knowing", "merges": "\u0120Know ing", "count": 203, "type": "remove by frequency"} -{"id": 49771, "token": "\u0120Leone", "merges": "\u0120Le one", "count": 203, "type": "remove by frequency"} -{"id": 45632, "token": "\u0120antisense", "merges": "\u0120antis ense", "count": 204, "type": "remove by frequency"} -{"id": 48444, "token": "\u0120Ramirez", "merges": "\u0120Ram irez", "count": 204, "type": "remove by frequency"} -{"id": 18143, "token": "\u0120Utah", "merges": "\u0120U tah", "count": 204, "type": "remove by frequency"} -{"id": 38897, "token": "\u00e0\u00a9", "merges": "\u00e0 \u00a9", "count": 204, "type": "remove by frequency"} -{"id": 49478, "token": "\u0120dexamethasone", "merges": "\u0120dex amethasone", "count": 205, "type": "remove by frequency"} -{"id": 28838, "token": "toxins", "merges": "tox ins", "count": 205, "type": "remove by frequency"} -{"id": 28042, "token": "rVert", "merges": "r Vert", "count": 205, "type": "remove by frequency"} -{"id": 20883, "token": "**).", "merges": "** ).", "count": 205, "type": "remove by frequency"} -{"id": 49773, "token": "\u00d9\u012d", "merges": "\u00d9 \u012d", "count": 205, "type": "remove by frequency"} -{"id": 46992, "token": "\u00ef\u00ac", "merges": "\u00ef \u00ac", "count": 205, "type": "remove by frequency"} -{"id": 39722, "token": "\u0120Constitutional", "merges": "\u0120Constitution al", "count": 206, "type": "remove by frequency"} -{"id": 47974, "token": "\u0120Augustine", "merges": "\u0120August ine", "count": 206, "type": "remove by frequency"} -{"id": 30647, "token": "\u0120Nashville", "merges": "\u0120Nash ville", "count": 206, "type": "remove by frequency"} -{"id": 44039, "token": "\u0120Pediatric", "merges": "\u0120Ped iatric", "count": 206, "type": "remove by frequency"} -{"id": 39790, "token": "\u0120Verizon", "merges": "\u0120Ver izon", "count": 206, "type": "remove by frequency"} -{"id": 32309, "token": "\u0120\u00cf\u0122\u00ce\u00bf\u00cf\u0127", "merges": "\u0120\u00cf\u0122 \u00ce\u00bf\u00cf\u0127", "count": 206, "type": "remove by frequency"} -{"id": 31208, "token": "\u0120Alban", "merges": "\u0120Al ban", "count": 206, "type": "remove by frequency"} -{"id": 48511, "token": "\u0120Theo", "merges": "\u0120The o", "count": 206, "type": "remove by frequency"} -{"id": 31217, "token": "\u0120n\u00c3\u00a5", "merges": "\u0120n \u00c3\u00a5", "count": 206, "type": "remove by frequency"} -{"id": 50380, "token": "\u00e2\u013a\u0140", "merges": "\u00e2\u013a \u0140", "count": 206, "type": "remove by frequency"} -{"id": 35916, "token": "AndroidRuntime", "merges": "Android Runtime", "count": 207, "type": "remove by frequency"} -{"id": 38143, "token": "\u0120aeruginosa", "merges": "\u0120aer uginosa", "count": 207, "type": "remove by frequency"} -{"id": 27314, "token": "\u0120Capitol", "merges": "\u0120Cap itol", "count": 207, "type": "remove by frequency"} -{"id": 24296, "token": "\u0120javax", "merges": "\u0120jav ax", "count": 207, "type": "remove by frequency"} -{"id": 43872, "token": "\u0120intraoperative", "merges": "\u0120intra operative", "count": 208, "type": "remove by frequency"} -{"id": 43631, "token": "\u0120hemodynamic", "merges": "\u0120hem odynamic", "count": 208, "type": "remove by frequency"} -{"id": 48967, "token": "\u0120neoplasms", "merges": "\u0120neopl asms", "count": 208, "type": "remove by frequency"} -{"id": 47677, "token": "\u0120Bedford", "merges": "\u0120Bed ford", "count": 208, "type": "remove by frequency"} -{"id": 50320, "token": "\u00e2\u012a\u0142", "merges": "\u00e2\u012a \u0142", "count": 208, "type": "remove by frequency"} -{"id": 13181, "token": "\u0120\\#", "merges": "\u0120\\ #", "count": 208, "type": "remove by frequency"} -{"id": 38029, "token": "\u0120touchdowns", "merges": "\u0120touchdown s", "count": 209, "type": "remove by frequency"} -{"id": 36541, "token": "\u0120agonists", "merges": "\u0120agon ists", "count": 209, "type": "remove by frequency"} -{"id": 34992, "token": "\u0120Dubai", "merges": "\u0120Dub ai", "count": 209, "type": "remove by frequency"} -{"id": 49426, "token": "\u0120Dover", "merges": "\u0120D over", "count": 209, "type": "remove by frequency"} -{"id": 44846, "token": "\u0120Elena", "merges": "\u0120El ena", "count": 209, "type": "remove by frequency"} -{"id": 49000, "token": "\u0120Oste", "merges": "\u0120O ste", "count": 209, "type": "remove by frequency"} -{"id": 29789, "token": "$\u00e2\u0122\u013b", "merges": "$ \u00e2\u0122\u013b", "count": 209, "type": "remove by frequency"} -{"id": 15263, "token": "\u0120Alabama", "merges": "\u0120Al abama", "count": 210, "type": "remove by frequency"} -{"id": 37870, "token": "\u0120Durham", "merges": "\u0120Dur ham", "count": 210, "type": "remove by frequency"} -{"id": 36779, "token": "\u0120Sharon", "merges": "\u0120Sh aron", "count": 210, "type": "remove by frequency"} -{"id": 25625, "token": "\u0120UNITED", "merges": "\u0120UN ITED", "count": 210, "type": "remove by frequency"} -{"id": 46516, "token": "\u0120Dios", "merges": "\u0120D ios", "count": 210, "type": "remove by frequency"} -{"id": 41028, "token": "\u0120Balk", "merges": "\u0120B alk", "count": 210, "type": "remove by frequency"} -{"id": 36397, "token": "\u0120(\u00c2\u00a3", "merges": "\u0120( \u00c2\u00a3", "count": 210, "type": "remove by frequency"} -{"id": 21370, "token": "$-$", "merges": "$- $", "count": 210, "type": "remove by frequency"} -{"id": 47957, "token": "\u0120clonal", "merges": "\u0120cl onal", "count": 211, "type": "remove by frequency"} -{"id": 47138, "token": "\u0120Scotia", "merges": "\u0120Scot ia", "count": 211, "type": "remove by frequency"} -{"id": 44065, "token": "\u0120Mickey", "merges": "\u0120Mic key", "count": 211, "type": "remove by frequency"} -{"id": 30175, "token": "\u0120\u00ce\u00b3\u00ce\u00b9\u00ce\u00b1", "merges": "\u0120\u00ce\u00b3 \u00ce\u00b9\u00ce\u00b1", "count": 211, "type": "remove by frequency"} -{"id": 47873, "token": "\u0120Tyson", "merges": "\u0120T yson", "count": 211, "type": "remove by frequency"} -{"id": 37029, "token": "\u0120Presidential", "merges": "\u0120President ial", "count": 212, "type": "remove by frequency"} -{"id": 46718, "token": "\u0120massless", "merges": "\u0120mass less", "count": 212, "type": "remove by frequency"} -{"id": 42753, "token": "\u0120Dante", "merges": "\u0120D ante", "count": 212, "type": "remove by frequency"} -{"id": 24605, "token": "\u0120Cuba", "merges": "\u0120Cub a", "count": 212, "type": "remove by frequency"} -{"id": 41234, "token": "\u0120genotyping", "merges": "\u0120gen otyping", "count": 213, "type": "remove by frequency"} -{"id": 29912, "token": "\u0120Bristol", "merges": "\u0120B ristol", "count": 213, "type": "remove by frequency"} -{"id": 32845, "token": ")}=", "merges": ")} =", "count": 213, "type": "remove by frequency"} -{"id": 33026, "token": "^-$", "merges": "^- $", "count": 213, "type": "remove by frequency"} -{"id": 49054, "token": "\u0120copolymers", "merges": "\u0120cop olymers", "count": 214, "type": "remove by frequency"} -{"id": 17111, "token": "\u0120Ukraine", "merges": "\u0120Uk raine", "count": 214, "type": "remove by frequency"} -{"id": 45576, "token": "carbonyl", "merges": "carbon yl", "count": 214, "type": "remove by frequency"} -{"id": 46972, "token": "\u0120Gloria", "merges": "\u0120Glor ia", "count": 214, "type": "remove by frequency"} -{"id": 46440, "token": "\u0120\u00ce\u00b1\u00cf\u0122\u00ce\u00bf", "merges": "\u0120\u00ce\u00b1 \u00cf\u0122\u00ce\u00bf", "count": 214, "type": "remove by frequency"} -{"id": 49531, "token": "\u0120Truman", "merges": "\u0120Tr uman", "count": 214, "type": "remove by frequency"} -{"id": 38444, "token": "\u00cf\u0125\u00ce\u00b7\u00cf\u0124", "merges": "\u00cf\u0125\u00ce\u00b7 \u00cf\u0124", "count": 214, "type": "remove by frequency"} -{"id": 10967, "token": "\u0120\u00c2\u0142\u00c2\u0142", "merges": "\u0120 \u00c2\u0142\u00c2\u0142", "count": 214, "type": "remove by frequency"} -{"id": 30148, "token": "$_{\\", "merges": "$ _{\\", "count": 214, "type": "remove by frequency"} -{"id": 45490, "token": "\u0120Jed", "merges": "\u0120J ed", "count": 214, "type": "remove by frequency"} -{"id": 47073, "token": "\u0120degenerative", "merges": "\u0120degener ative", "count": 215, "type": "remove by frequency"} -{"id": 32436, "token": "\u0120Napoleon", "merges": "\u0120Napole on", "count": 215, "type": "remove by frequency"} -{"id": 30656, "token": "\u00cf\u0126\u00ce\u00b1\u00ce\u00b9", "merges": "\u00cf\u0126 \u00ce\u00b1\u00ce\u00b9", "count": 215, "type": "remove by frequency"} -{"id": 48337, "token": "\u0120G\u00c3\u00b6", "merges": "\u0120G \u00c3\u00b6", "count": 215, "type": "remove by frequency"} -{"id": 22969, "token": "=\\{", "merges": "=\\ {", "count": 215, "type": "remove by frequency"} -{"id": 36500, "token": "\u0120pneumoniae", "merges": "\u0120pneumonia e", "count": 216, "type": "remove by frequency"} -{"id": 42894, "token": "\u0120Sheffield", "merges": "\u0120She ffield", "count": 216, "type": "remove by frequency"} -{"id": 31109, "token": "\u0120motility", "merges": "\u0120mot ility", "count": 216, "type": "remove by frequency"} -{"id": 14553, "token": "$).", "merges": "$ ).", "count": 216, "type": "remove by frequency"} -{"id": 42264, "token": "nonatomic", "merges": "non atomic", "count": 217, "type": "remove by frequency"} -{"id": 36532, "token": "\u0120Randy", "merges": "\u0120R andy", "count": 217, "type": "remove by frequency"} -{"id": 36177, "token": "\u0120fr\u00c3\u00a5n", "merges": "\u0120fr\u00c3\u00a5 n", "count": 217, "type": "remove by frequency"} -{"id": 42735, "token": "\u0120Mick", "merges": "\u0120M ick", "count": 217, "type": "remove by frequency"} -{"id": 46977, "token": "\u0120\u00d7\u0137", "merges": "\u0120\u00d7 \u0137", "count": 217, "type": "remove by frequency"} -{"id": 29721, "token": "\u0120homosexual", "merges": "\u0120hom osexual", "count": 218, "type": "remove by frequency"} -{"id": 38476, "token": "\u0120Stockholm", "merges": "\u0120Stock holm", "count": 218, "type": "remove by frequency"} -{"id": 42110, "token": "\u0120epitopes", "merges": "\u0120epit opes", "count": 218, "type": "remove by frequency"} -{"id": 43525, "token": "\u0120hypergly", "merges": "\u0120hyper gly", "count": 218, "type": "remove by frequency"} -{"id": 41678, "token": "\u0120Rodgers", "merges": "\u0120Rod gers", "count": 218, "type": "remove by frequency"} -{"id": 41624, "token": "\u0120Luckily", "merges": "\u0120Luck ily", "count": 218, "type": "remove by frequency"} -{"id": 47572, "token": "\u0120Dix", "merges": "\u0120D ix", "count": 218, "type": "remove by frequency"} -{"id": 50344, "token": "\u00e2\u0136\u00a3", "merges": "\u00e2\u0136 \u00a3", "count": 218, "type": "remove by frequency"} -{"id": 20868, "token": "%\"}", "merges": "% \"}", "count": 218, "type": "remove by frequency"} -{"id": 37689, "token": "\u0120breastfeeding", "merges": "\u0120breast feeding", "count": 219, "type": "remove by frequency"} -{"id": 46143, "token": "\u0120europ\u00c3\u00a9enne", "merges": "\u0120europ\u00c3\u00a9 enne", "count": 219, "type": "remove by frequency"} -{"id": 36796, "token": "\u0120\u00cf\u0126\u00ce\u00b1", "merges": "\u0120\u00cf\u0126 \u00ce\u00b1", "count": 219, "type": "remove by frequency"} -{"id": 42860, "token": "\u0120AKI", "merges": "\u0120AK I", "count": 219, "type": "remove by frequency"} -{"id": 18002, "token": "bigr", "merges": "big r", "count": 219, "type": "remove by frequency"} -{"id": 54269, "token": "\u00e9\u0138\u0134", "merges": "\u00e9\u0138 \u0134", "count": 219, "type": "remove by frequency"} -{"id": 41915, "token": "}&\\", "merges": "} &\\", "count": 219, "type": "remove by frequency"} -{"id": 50670, "token": "\u00e5\u0125\u00b1", "merges": "\u00e5\u0125 \u00b1", "count": 219, "type": "remove by frequency"} -{"id": 49419, "token": "\u0120Heidegger", "merges": "\u0120He idegger", "count": 220, "type": "remove by frequency"} -{"id": 16301, "token": "\u0120Missouri", "merges": "\u0120Miss ouri", "count": 220, "type": "remove by frequency"} -{"id": 17297, "token": "\u0120cytotox", "merges": "\u0120cyt otox", "count": 220, "type": "remove by frequency"} -{"id": 36227, "token": "\u0120kunn", "merges": "\u0120k unn", "count": 220, "type": "remove by frequency"} -{"id": 9256, "token": "\u0120Iraq", "merges": "\u0120Ira q", "count": 220, "type": "remove by frequency"} -{"id": 49179, "token": "\u0120methylated", "merges": "\u0120methyl ated", "count": 221, "type": "remove by frequency"} -{"id": 43297, "token": "\u0120presidente", "merges": "\u0120president e", "count": 221, "type": "remove by frequency"} -{"id": 29629, "token": "\u0120Rangers", "merges": "\u0120R angers", "count": 221, "type": "remove by frequency"} -{"id": 47060, "token": "\u0120hanno", "merges": "\u0120h anno", "count": 221, "type": "remove by frequency"} -{"id": 42009, "token": "\u0120Saul", "merges": "\u0120Sa ul", "count": 221, "type": "remove by frequency"} -{"id": 23591, "token": "\u0120IGF", "merges": "\u0120I GF", "count": 221, "type": "remove by frequency"} -{"id": 49035, "token": "\u0120\u00c3\u013c", "merges": "\u0120\u00c3 \u013c", "count": 221, "type": "remove by frequency"} -{"id": 13941, "token": ")\\,", "merges": ")\\ ,", "count": 221, "type": "remove by frequency"} -{"id": 22119, "token": "\u0120Pittsburgh", "merges": "\u0120Pitts burgh", "count": 222, "type": "remove by frequency"} -{"id": 35244, "token": "\u0120vascul", "merges": "\u0120vas cul", "count": 222, "type": "remove by frequency"} -{"id": 36604, "token": "\u0120Brett", "merges": "\u0120B rett", "count": 222, "type": "remove by frequency"} -{"id": 36364, "token": "\u0120Alger", "merges": "\u0120Al ger", "count": 222, "type": "remove by frequency"} -{"id": 49648, "token": "\u0120Huh", "merges": "\u0120H uh", "count": 222, "type": "remove by frequency"} -{"id": 38082, "token": "\u0120BMD", "merges": "\u0120B MD", "count": 222, "type": "remove by frequency"} -{"id": 28832, "token": "\u00cf\u0123\u00cf\u012e", "merges": "\u00cf\u0123 \u00cf\u012e", "count": 222, "type": "remove by frequency"} -{"id": 48704, "token": "\u0120Proteins", "merges": "\u0120Prote ins", "count": 223, "type": "remove by frequency"} -{"id": 21249, "token": "\u0120Brooklyn", "merges": "\u0120Brook lyn", "count": 223, "type": "remove by frequency"} -{"id": 38196, "token": "\u0120pa\u00c3\u0143ses", "merges": "\u0120pa \u00c3\u0143ses", "count": 223, "type": "remove by frequency"} -{"id": 41017, "token": "\u0120Quite", "merges": "\u0120Qu ite", "count": 223, "type": "remove by frequency"} -{"id": 37138, "token": "\u0120Pruss", "merges": "\u0120Pr uss", "count": 223, "type": "remove by frequency"} -{"id": 20242, "token": "\u0120Christianity", "merges": "\u0120Christian ity", "count": 224, "type": "remove by frequency"} -{"id": 41073, "token": "\u0120Billboard", "merges": "\u0120Bill board", "count": 224, "type": "remove by frequency"} -{"id": 19918, "token": "\u0120cytokine", "merges": "\u0120cytok ine", "count": 224, "type": "remove by frequency"} -{"id": 47829, "token": "\u0120Adri", "merges": "\u0120Ad ri", "count": 224, "type": "remove by frequency"} -{"id": 21284, "token": "FPar", "merges": "F Par", "count": 224, "type": "remove by frequency"} -{"id": 49635, "token": "\u0120p\u00c3\u00a4", "merges": "\u0120p \u00c3\u00a4", "count": 224, "type": "remove by frequency"} -{"id": 45300, "token": "\u0120neonates", "merges": "\u0120neon ates", "count": 225, "type": "remove by frequency"} -{"id": 34981, "token": "\u0120Forty", "merges": "\u0120Fort y", "count": 225, "type": "remove by frequency"} -{"id": 34592, "token": "\u0120Petersburg", "merges": "\u0120Peters burg", "count": 226, "type": "remove by frequency"} -{"id": 24713, "token": ")\u00e2\u0122\u0135", "merges": ") \u00e2\u0122\u0135", "count": 226, "type": "remove by frequency"} -{"id": 38377, "token": "\u00ce\u00b8\u00ce\u00b5", "merges": "\u00ce\u00b8 \u00ce\u00b5", "count": 226, "type": "remove by frequency"} -{"id": 47546, "token": "**^", "merges": "** ^", "count": 226, "type": "remove by frequency"} -{"id": 35497, "token": "\u0120OTHERWISE", "merges": "\u0120OTHER WISE", "count": 227, "type": "remove by frequency"} -{"id": 46667, "token": "\u0120preseason", "merges": "\u0120pre season", "count": 227, "type": "remove by frequency"} -{"id": 48270, "token": "\u0120Doctors", "merges": "\u0120Do ctors", "count": 227, "type": "remove by frequency"} -{"id": 46063, "token": "\u0120AMERICA", "merges": "\u0120AMER ICA", "count": 227, "type": "remove by frequency"} -{"id": 42835, "token": "\u0120Windsor", "merges": "\u0120Wind sor", "count": 227, "type": "remove by frequency"} -{"id": 37500, "token": "\u0120\u00e0\u00a4\u00ae", "merges": "\u0120\u00e0\u00a4 \u00ae", "count": 227, "type": "remove by frequency"} -{"id": 39613, "token": "\u0120monolayer", "merges": "\u0120monol ayer", "count": 228, "type": "remove by frequency"} -{"id": 43263, "token": "\u0120amygdala", "merges": "\u0120amyg dala", "count": 228, "type": "remove by frequency"} -{"id": 30031, "token": "ViewById", "merges": "View ById", "count": 228, "type": "remove by frequency"} -{"id": 38076, "token": "\u0120Belf", "merges": "\u0120B elf", "count": 228, "type": "remove by frequency"} -{"id": 38033, "token": "\u00d9\u0126\u00d9\u012b", "merges": "\u00d9\u0126 \u00d9\u012b", "count": 228, "type": "remove by frequency"} -{"id": 48433, "token": "\u00e2\u0122\u0133", "merges": "\u00e2\u0122 \u0133", "count": 228, "type": "remove by frequency"} -{"id": 34473, "token": "\u0120Hurricane", "merges": "\u0120Hur ricane", "count": 229, "type": "remove by frequency"} -{"id": 33386, "token": "\u0120Arsenal", "merges": "\u0120Ar senal", "count": 229, "type": "remove by frequency"} -{"id": 44930, "token": "\u0120nanost", "merges": "\u0120nan ost", "count": 229, "type": "remove by frequency"} -{"id": 47066, "token": "\u0120Sussex", "merges": "\u0120Sus sex", "count": 229, "type": "remove by frequency"} -{"id": 8752, "token": "widehat", "merges": "wide hat", "count": 229, "type": "remove by frequency"} -{"id": 48547, "token": "\u0120zich", "merges": "\u0120z ich", "count": 229, "type": "remove by frequency"} -{"id": 43202, "token": "\u0120Myc", "merges": "\u0120My c", "count": 229, "type": "remove by frequency"} -{"id": 34641, "token": "\u0120Chancellor", "merges": "\u0120Ch ancellor", "count": 230, "type": "remove by frequency"} -{"id": 40169, "token": "\u0120ORDERED", "merges": "\u0120ORDER ED", "count": 230, "type": "remove by frequency"} -{"id": 28299, "token": "\u0120Buffalo", "merges": "\u0120Buff alo", "count": 230, "type": "remove by frequency"} -{"id": 27238, "token": "\u0120Eddie", "merges": "\u0120Edd ie", "count": 230, "type": "remove by frequency"} -{"id": 49690, "token": "\u0120ihn", "merges": "\u0120i hn", "count": 230, "type": "remove by frequency"} -{"id": 41783, "token": "\u00cf\u0125\u00ce\u00b5", "merges": "\u00cf\u0125 \u00ce\u00b5", "count": 230, "type": "remove by frequency"} -{"id": 1184, "token": "]\\]", "merges": "] \\]", "count": 230, "type": "remove by frequency"} -{"id": 29471, "token": "\u0120Fortunately", "merges": "\u0120F ortunately", "count": 231, "type": "remove by frequency"} -{"id": 48851, "token": "Fortunately", "merges": "F ortunately", "count": 231, "type": "remove by frequency"} -{"id": 20383, "token": "\u0120PARTICULAR", "merges": "\u0120PARTIC ULAR", "count": 231, "type": "remove by frequency"} -{"id": 49445, "token": "\u0120Abdullah", "merges": "\u0120Abd ullah", "count": 231, "type": "remove by frequency"} -{"id": 49028, "token": "\u0120Donovan", "merges": "\u0120Don ovan", "count": 231, "type": "remove by frequency"} -{"id": 47281, "token": "\u0120Fasc", "merges": "\u0120F asc", "count": 231, "type": "remove by frequency"} -{"id": 49099, "token": "\u00c3\u0143v", "merges": "\u00c3\u0143 v", "count": 231, "type": "remove by frequency"} -{"id": 47706, "token": "\u0120Kingston", "merges": "\u0120King ston", "count": 232, "type": "remove by frequency"} -{"id": 18231, "token": "ellees", "merges": "elle es", "count": 232, "type": "remove by frequency"} -{"id": 44518, "token": "\u0120Amend", "merges": "\u0120Am end", "count": 232, "type": "remove by frequency"} -{"id": 15600, "token": "\u0120Christians", "merges": "\u0120Christ ians", "count": 233, "type": "remove by frequency"} -{"id": 36090, "token": "\u0120Finnish", "merges": "\u0120Finn ish", "count": 233, "type": "remove by frequency"} -{"id": 42191, "token": "\u0120\u00cf\u0125\u00cf\u0126\u00ce\u00bf", "merges": "\u0120\u00cf\u0125 \u00cf\u0126\u00ce\u00bf", "count": 233, "type": "remove by frequency"} -{"id": 40868, "token": "\u0120Pompe", "merges": "\u0120Pom pe", "count": 233, "type": "remove by frequency"} -{"id": 3891, "token": "\u0120(\\[", "merges": "\u0120( \\[", "count": 233, "type": "remove by frequency"} -{"id": 36204, "token": "\u00ce\u00b9\u00ce\u00bf", "merges": "\u00ce\u00b9 \u00ce\u00bf", "count": 233, "type": "remove by frequency"} -{"id": 44829, "token": "\u0120\\%", "merges": "\u0120\\ %", "count": 233, "type": "remove by frequency"} -{"id": 42388, "token": "\u0120progenitors", "merges": "\u0120progen itors", "count": 234, "type": "remove by frequency"} -{"id": 47309, "token": "\u0120bilayer", "merges": "\u0120bil ayer", "count": 234, "type": "remove by frequency"} -{"id": 25579, "token": "\u0120Chelsea", "merges": "\u0120Chel sea", "count": 234, "type": "remove by frequency"} -{"id": 47877, "token": "\u0120Stanton", "merges": "\u0120St anton", "count": 234, "type": "remove by frequency"} -{"id": 30008, "token": "\u0120tr\u00c3\u00a8s", "merges": "\u0120tr \u00c3\u00a8s", "count": 234, "type": "remove by frequency"} -{"id": 42580, "token": "\u00e2\u0122\u0141", "merges": "\u00e2\u0122 \u0141", "count": 234, "type": "remove by frequency"} -{"id": 24419, "token": "\u0120Protest", "merges": "\u0120Pro test", "count": 235, "type": "remove by frequency"} -{"id": 39897, "token": "\u0120parench", "merges": "\u0120pa rench", "count": 235, "type": "remove by frequency"} -{"id": 8798, "token": "amsmath", "merges": "ams math", "count": 235, "type": "remove by frequency"} -{"id": 45393, "token": "\u0120Dillon", "merges": "\u0120D illon", "count": 235, "type": "remove by frequency"} -{"id": 42006, "token": "\u0120Paula", "merges": "\u0120Pa ula", "count": 235, "type": "remove by frequency"} -{"id": 42319, "token": "\u0120Clare", "merges": "\u0120Cl are", "count": 235, "type": "remove by frequency"} -{"id": 24501, "token": "\u00c3\u0143an", "merges": "\u00c3\u0143 an", "count": 235, "type": "remove by frequency"} -{"id": 33627, "token": "\u00cf\u0126\u00ce\u00ac", "merges": "\u00cf\u0126 \u00ce\u00ac", "count": 235, "type": "remove by frequency"} -{"id": 53008, "token": "\u00e7\u013e\u0140", "merges": "\u00e7\u013e \u0140", "count": 235, "type": "remove by frequency"} -{"id": 38360, "token": "\u0120anticancer", "merges": "\u0120antican cer", "count": 236, "type": "remove by frequency"} -{"id": 26469, "token": "\u0120Township", "merges": "\u0120Town ship", "count": 236, "type": "remove by frequency"} -{"id": 49066, "token": "\u0120afferent", "merges": "\u0120af ferent", "count": 236, "type": "remove by frequency"} -{"id": 37714, "token": "\u0120antican", "merges": "\u0120ant ican", "count": 236, "type": "remove by frequency"} -{"id": 30447, "token": "\u0120kinases", "merges": "\u0120kin ases", "count": 236, "type": "remove by frequency"} -{"id": 46644, "token": "\u0120Flores", "merges": "\u0120Fl ores", "count": 236, "type": "remove by frequency"} -{"id": 33811, "token": "\u0120Amanda", "merges": "\u0120Am anda", "count": 236, "type": "remove by frequency"} -{"id": 35307, "token": "\u0120Gavin", "merges": "\u0120G avin", "count": 236, "type": "remove by frequency"} -{"id": 31143, "token": "\u0120Lions", "merges": "\u0120L ions", "count": 236, "type": "remove by frequency"} -{"id": 14890, "token": "\u0120Iowa", "merges": "\u0120I owa", "count": 236, "type": "remove by frequency"} -{"id": 41436, "token": "\u0120Wir", "merges": "\u0120W ir", "count": 236, "type": "remove by frequency"} -{"id": 48564, "token": "\u0120GPs", "merges": "\u0120G Ps", "count": 236, "type": "remove by frequency"} -{"id": 50375, "token": "\u00e2\u0139\u0137", "merges": "\u00e2\u0139 \u0137", "count": 236, "type": "remove by frequency"} -{"id": 50281, "token": "\u00ca\u00b0", "merges": "\u00ca \u00b0", "count": 236, "type": "remove by frequency"} -{"id": 27791, "token": "\u0120phenotypic", "merges": "\u0120phen otypic", "count": 237, "type": "remove by frequency"} -{"id": 30249, "token": "\u0120pathophys", "merges": "\u0120path ophys", "count": 237, "type": "remove by frequency"} -{"id": 30556, "token": "\u0120Halloween", "merges": "\u0120Hallow een", "count": 237, "type": "remove by frequency"} -{"id": 43889, "token": "\u0120Lafayette", "merges": "\u0120Laf ayette", "count": 237, "type": "remove by frequency"} -{"id": 19160, "token": "\u0120Appeal", "merges": "\u0120Appe al", "count": 237, "type": "remove by frequency"} -{"id": 44438, "token": "\u0120Refuge", "merges": "\u0120Ref uge", "count": 237, "type": "remove by frequency"} -{"id": 20734, "token": "\u0120Denver", "merges": "\u0120Den ver", "count": 237, "type": "remove by frequency"} -{"id": 9501, "token": "mathsf", "merges": "math sf", "count": 237, "type": "remove by frequency"} -{"id": 44248, "token": "\u0120Owens", "merges": "\u0120Ow ens", "count": 237, "type": "remove by frequency"} -{"id": 28526, "token": "lvert", "merges": "l vert", "count": 237, "type": "remove by frequency"} -{"id": 38132, "token": "):=", "merges": "): =", "count": 237, "type": "remove by frequency"} -{"id": 53253, "token": "\u00e7\u00b1\u0132", "merges": "\u00e7\u00b1 \u0132", "count": 237, "type": "remove by frequency"} -{"id": 36152, "token": "\u0120homozygous", "merges": "\u0120hom ozygous", "count": 238, "type": "remove by frequency"} -{"id": 35287, "token": "\u0120Southwest", "merges": "\u0120South west", "count": 238, "type": "remove by frequency"} -{"id": 32984, "token": "\u0120CIRCUIT", "merges": "\u0120CIR CUIT", "count": 238, "type": "remove by frequency"} -{"id": 27540, "token": "\u0120Montana", "merges": "\u0120Mont ana", "count": 238, "type": "remove by frequency"} -{"id": 41874, "token": "\u0120Somers", "merges": "\u0120S omers", "count": 238, "type": "remove by frequency"} -{"id": 18806, "token": "\u0120\u00ce\u00ba\u00ce\u00b1\u00ce\u00b9", "merges": "\u0120\u00ce\u00ba \u00ce\u00b1\u00ce\u00b9", "count": 238, "type": "remove by frequency"} -{"id": 15427, "token": "\u0120Oregon", "merges": "\u0120O regon", "count": 238, "type": "remove by frequency"} -{"id": 36546, "token": "\u0120Seoul", "merges": "\u0120Se oul", "count": 238, "type": "remove by frequency"} -{"id": 49524, "token": "\u0120Zag", "merges": "\u0120Z ag", "count": 238, "type": "remove by frequency"} -{"id": 41620, "token": "\u00cf\u0123\u00ce\u0143", "merges": "\u00cf\u0123 \u00ce\u0143", "count": 238, "type": "remove by frequency"} -{"id": 25657, "token": "\u00e0\u00ab", "merges": "\u00e0 \u00ab", "count": 238, "type": "remove by frequency"} -{"id": 21240, "token": "\u0120Chairman", "merges": "\u0120Chair man", "count": 239, "type": "remove by frequency"} -{"id": 37273, "token": "mspace", "merges": "ms pace", "count": 239, "type": "remove by frequency"} -{"id": 42044, "token": "\u00cf\u0122\u00ce\u00b1", "merges": "\u00cf\u0122 \u00ce\u00b1", "count": 239, "type": "remove by frequency"} -{"id": 50143, "token": "Unmarshal", "merges": "Un marshal", "count": 240, "type": "remove by frequency"} -{"id": 27411, "token": "\u0120Athens", "merges": "\u0120Athe ns", "count": 240, "type": "remove by frequency"} -{"id": 36312, "token": "\u0120Derek", "merges": "\u0120De rek", "count": 240, "type": "remove by frequency"} -{"id": 33629, "token": "\u00cf\u0125\u00ce\u00b5\u00ce\u00b9", "merges": "\u00cf\u0125 \u00ce\u00b5\u00ce\u00b9", "count": 240, "type": "remove by frequency"} -{"id": 34215, "token": "\u0120S\u00c3\u00a3o", "merges": "\u0120S \u00c3\u00a3o", "count": 240, "type": "remove by frequency"} -{"id": 45655, "token": "\u0120Tina", "merges": "\u0120T ina", "count": 240, "type": "remove by frequency"} -{"id": 52955, "token": "\u00e7\u013b\u0134", "merges": "\u00e7\u013b \u0134", "count": 240, "type": "remove by frequency"} -{"id": 25564, "token": "\u0120Caribbean", "merges": "\u0120Carib bean", "count": 241, "type": "remove by frequency"} -{"id": 39388, "token": "\u0120contral", "merges": "\u0120cont ral", "count": 241, "type": "remove by frequency"} -{"id": 38308, "token": "\u0120Hearing", "merges": "\u0120H earing", "count": 241, "type": "remove by frequency"} -{"id": 49684, "token": "\u0120Lymph", "merges": "\u0120L ymph", "count": 241, "type": "remove by frequency"} -{"id": 37204, "token": "\u0120Kris", "merges": "\u0120K ris", "count": 241, "type": "remove by frequency"} -{"id": 14614, "token": "\u0120Indiana", "merges": "\u0120Indian a", "count": 242, "type": "remove by frequency"} -{"id": 33572, "token": "\u0120Tigers", "merges": "\u0120Tig ers", "count": 242, "type": "remove by frequency"} -{"id": 44379, "token": "\u0120mucho", "merges": "\u0120much o", "count": 242, "type": "remove by frequency"} -{"id": 45090, "token": "\u0120Milky", "merges": "\u0120Mil ky", "count": 242, "type": "remove by frequency"} -{"id": 49098, "token": "\u0120Huntington", "merges": "\u0120Hunting ton", "count": 243, "type": "remove by frequency"} -{"id": 17165, "token": "\u0120Olympics", "merges": "\u0120Olymp ics", "count": 243, "type": "remove by frequency"} -{"id": 45572, "token": "\u0120Brennan", "merges": "\u0120Bren nan", "count": 243, "type": "remove by frequency"} -{"id": 24175, "token": "uparrow", "merges": "up arrow", "count": 244, "type": "remove by frequency"} -{"id": 47918, "token": "\u0120Dental", "merges": "\u0120D ental", "count": 244, "type": "remove by frequency"} -{"id": 38438, "token": "ORAND", "merges": "OR AND", "count": 244, "type": "remove by frequency"} -{"id": 29250, "token": "\u0120Laid", "merges": "\u0120La id", "count": 244, "type": "remove by frequency"} -{"id": 19377, "token": "springframework", "merges": "spring framework", "count": 245, "type": "remove by frequency"} -{"id": 44050, "token": "\u0120immunotherapy", "merges": "\u0120immun otherapy", "count": 245, "type": "remove by frequency"} -{"id": 28038, "token": "\u0120angiogenesis", "merges": "\u0120angi ogenesis", "count": 245, "type": "remove by frequency"} -{"id": 47831, "token": "\u0120Christina", "merges": "\u0120Christ ina", "count": 245, "type": "remove by frequency"} -{"id": 48956, "token": "olinergic", "merges": "olin ergic", "count": 245, "type": "remove by frequency"} -{"id": 41429, "token": "\u0120Findings", "merges": "\u0120Find ings", "count": 245, "type": "remove by frequency"} -{"id": 37241, "token": "\u0120k\u00c3\u00b6nnen", "merges": "\u0120k\u00c3\u00b6nn en", "count": 245, "type": "remove by frequency"} -{"id": 16869, "token": "\u0120Indians", "merges": "\u0120Ind ians", "count": 245, "type": "remove by frequency"} -{"id": 40592, "token": "\u0120Steele", "merges": "\u0120Ste ele", "count": 245, "type": "remove by frequency"} -{"id": 49665, "token": "\u0120Abrams", "merges": "\u0120Abr ams", "count": 245, "type": "remove by frequency"} -{"id": 44057, "token": "\u0120Essex", "merges": "\u0120Es sex", "count": 245, "type": "remove by frequency"} -{"id": 42898, "token": "\u0120ChIP", "merges": "\u0120Ch IP", "count": 245, "type": "remove by frequency"} -{"id": 52875, "token": "\u00e7\u0136\u00af", "merges": "\u00e7\u0136 \u00af", "count": 245, "type": "remove by frequency"} -{"id": 7294, "token": "}}^", "merges": "}} ^", "count": 245, "type": "remove by frequency"} -{"id": 13162, "token": "\u0120Consequently", "merges": "\u0120Con sequently", "count": 246, "type": "remove by frequency"} -{"id": 49905, "token": "\u0120Nass", "merges": "\u0120N ass", "count": 246, "type": "remove by frequency"} -{"id": 8633, "token": "}\\,", "merges": "}\\ ,", "count": 246, "type": "remove by frequency"} -{"id": 41813, "token": "\u0120Brunswick", "merges": "\u0120Brun swick", "count": 247, "type": "remove by frequency"} -{"id": 28975, "token": "textnormal", "merges": "text normal", "count": 247, "type": "remove by frequency"} -{"id": 43978, "token": "ciparum", "merges": "cip arum", "count": 247, "type": "remove by frequency"} -{"id": 42948, "token": "\u0120Karn", "merges": "\u0120K arn", "count": 247, "type": "remove by frequency"} -{"id": 21119, "token": "\u0120f\u00c3\u00b6", "merges": "\u0120f \u00c3\u00b6", "count": 247, "type": "remove by frequency"} -{"id": 48893, "token": "\u0120Determination", "merges": "\u0120Determ ination", "count": 248, "type": "remove by frequency"} -{"id": 37495, "token": "\u0120Conclusion", "merges": "\u0120Con clusion", "count": 248, "type": "remove by frequency"} -{"id": 23050, "token": "\u0120Nevada", "merges": "\u0120Nev ada", "count": 248, "type": "remove by frequency"} -{"id": 37839, "token": "\u0120Wrest", "merges": "\u0120W rest", "count": 248, "type": "remove by frequency"} -{"id": 47204, "token": "\u0120ellos", "merges": "\u0120ell os", "count": 248, "type": "remove by frequency"} -{"id": 47086, "token": "\u0120heme", "merges": "\u0120he me", "count": 248, "type": "remove by frequency"} -{"id": 33599, "token": "\u0120inoculated", "merges": "\u0120inoc ulated", "count": 249, "type": "remove by frequency"} -{"id": 43107, "token": "\u0120Rivera", "merges": "\u0120River a", "count": 249, "type": "remove by frequency"} -{"id": 39605, "token": "\u0120Leeds", "merges": "\u0120Le eds", "count": 249, "type": "remove by frequency"} -{"id": 48555, "token": "\u0120Kirby", "merges": "\u0120Kir by", "count": 249, "type": "remove by frequency"} -{"id": 27006, "token": "\u0120Allah", "merges": "\u0120All ah", "count": 249, "type": "remove by frequency"} -{"id": 44946, "token": "\u0120Pence", "merges": "\u0120P ence", "count": 249, "type": "remove by frequency"} -{"id": 35069, "token": "\u0120RAF", "merges": "\u0120R AF", "count": 249, "type": "remove by frequency"} -{"id": 42168, "token": "\u0120CMV", "merges": "\u0120CM V", "count": 249, "type": "remove by frequency"} -{"id": 50385, "token": "\u00e3\u0122\u0125", "merges": "\u00e3\u0122 \u0125", "count": 249, "type": "remove by frequency"} -{"id": 51355, "token": "\u00e5\u00a7\u012b", "merges": "\u00e5\u00a7 \u012b", "count": 249, "type": "remove by frequency"} -{"id": 26846, "token": "\u0120Birmingham", "merges": "\u0120B irmingham", "count": 250, "type": "remove by frequency"} -{"id": 49034, "token": "\u0120testis", "merges": "\u0120test is", "count": 250, "type": "remove by frequency"} -{"id": 46801, "token": "\u0120McCoy", "merges": "\u0120McC oy", "count": 250, "type": "remove by frequency"} -{"id": 49318, "token": "\u0120Moody", "merges": "\u0120Mood y", "count": 250, "type": "remove by frequency"} -{"id": 44499, "token": "\u0120Hegel", "merges": "\u0120He gel", "count": 250, "type": "remove by frequency"} -{"id": 33533, "token": "\u0120Negro", "merges": "\u0120Neg ro", "count": 250, "type": "remove by frequency"} -{"id": 33164, "token": "\u0120Bcl", "merges": "\u0120B cl", "count": 250, "type": "remove by frequency"} -{"id": 48263, "token": "\u0120harboring", "merges": "\u0120harbor ing", "count": 251, "type": "remove by frequency"} -{"id": 47944, "token": "\u0120Jorge", "merges": "\u0120J orge", "count": 251, "type": "remove by frequency"} -{"id": 38690, "token": "\u0120Emir", "merges": "\u0120Em ir", "count": 251, "type": "remove by frequency"} -{"id": 42602, "token": "\u0120Neal", "merges": "\u0120Ne al", "count": 251, "type": "remove by frequency"} -{"id": 48596, "token": "\u0120Wyn", "merges": "\u0120W yn", "count": 251, "type": "remove by frequency"} -{"id": 47841, "token": "\u0120Starbucks", "merges": "\u0120Star bucks", "count": 252, "type": "remove by frequency"} -{"id": 32918, "token": "\u0120microtub", "merges": "\u0120micro tub", "count": 252, "type": "remove by frequency"} -{"id": 39387, "token": "\u0120androgen", "merges": "\u0120and rogen", "count": 252, "type": "remove by frequency"} -{"id": 37228, "token": "\u0120Chester", "merges": "\u0120Che ster", "count": 252, "type": "remove by frequency"} -{"id": 31955, "token": "\u0120Geneva", "merges": "\u0120Gen eva", "count": 252, "type": "remove by frequency"} -{"id": 18785, "token": "\u0120TGF", "merges": "\u0120T GF", "count": 252, "type": "remove by frequency"} -{"id": 43447, "token": "\u0120Bav", "merges": "\u0120B av", "count": 252, "type": "remove by frequency"} -{"id": 46083, "token": "\u0120*);", "merges": "\u0120* );", "count": 252, "type": "remove by frequency"} -{"id": 52466, "token": "\u00e6\u00b0\u00be", "merges": "\u00e6\u00b0 \u00be", "count": 252, "type": "remove by frequency"} -{"id": 50285, "token": "\u00cb\u012d", "merges": "\u00cb \u012d", "count": 252, "type": "remove by frequency"} -{"id": 47040, "token": "\u0120Lucia", "merges": "\u0120Luc ia", "count": 253, "type": "remove by frequency"} -{"id": 43623, "token": "\u0120Paso", "merges": "\u0120Pas o", "count": 253, "type": "remove by frequency"} -{"id": 23641, "token": "\u0120LGBT", "merges": "\u0120L GBT", "count": 253, "type": "remove by frequency"} -{"id": 34299, "token": "\u00cf\u0123\u00ce\u00af", "merges": "\u00cf\u0123 \u00ce\u00af", "count": 253, "type": "remove by frequency"} -{"id": 39117, "token": "\u0120Thy", "merges": "\u0120Th y", "count": 253, "type": "remove by frequency"} -{"id": 50357, "token": "\u00e2\u0138\u0127", "merges": "\u00e2\u0138 \u0127", "count": 253, "type": "remove by frequency"} -{"id": 47622, "token": "\u0120reticulum", "merges": "\u0120reticul um", "count": 254, "type": "remove by frequency"} -{"id": 37783, "token": "\u0120Panama", "merges": "\u0120Pan ama", "count": 254, "type": "remove by frequency"} -{"id": 47442, "token": "\u0120Gert", "merges": "\u0120G ert", "count": 254, "type": "remove by frequency"} -{"id": 26794, "token": "\u0120Nucl", "merges": "\u0120N ucl", "count": 254, "type": "remove by frequency"} -{"id": 17840, "token": "^{*", "merges": "^{ *", "count": 254, "type": "remove by frequency"} -{"id": 32668, "token": "\u0120Investig", "merges": "\u0120Invest ig", "count": 255, "type": "remove by frequency"} -{"id": 41583, "token": "\u0120Seeing", "merges": "\u0120See ing", "count": 255, "type": "remove by frequency"} -{"id": 49134, "token": "\u0120Bronze", "merges": "\u0120Bron ze", "count": 255, "type": "remove by frequency"} -{"id": 49713, "token": "Saharan", "merges": "Sah aran", "count": 255, "type": "remove by frequency"} -{"id": 50061, "token": "\u0120Boeh", "merges": "\u0120Bo eh", "count": 255, "type": "remove by frequency"} -{"id": 44261, "token": "\u0120Cork", "merges": "\u0120C ork", "count": 255, "type": "remove by frequency"} -{"id": 4299, "token": "\u0120[**", "merges": "\u0120[ **", "count": 255, "type": "remove by frequency"} -{"id": 29670, "token": "operatively", "merges": "oper atively", "count": 256, "type": "remove by frequency"} -{"id": 43853, "token": "\u0120lymphoid", "merges": "\u0120lymph oid", "count": 256, "type": "remove by frequency"} -{"id": 27741, "token": "\u0120Florence", "merges": "\u0120Fl orence", "count": 256, "type": "remove by frequency"} -{"id": 31519, "token": "\u0120Bulgar", "merges": "\u0120Bul gar", "count": 256, "type": "remove by frequency"} -{"id": 25628, "token": "\u0120\u00cf\u0126\u00ce\u00b7\u00ce\u00bd", "merges": "\u0120\u00cf\u0126\u00ce\u00b7 \u00ce\u00bd", "count": 256, "type": "remove by frequency"} -{"id": 40076, "token": "\u0120larval", "merges": "\u0120lar val", "count": 256, "type": "remove by frequency"} -{"id": 26266, "token": "\u0120fr\u00c3\u00a5", "merges": "\u0120fr \u00c3\u00a5", "count": 256, "type": "remove by frequency"} -{"id": 43702, "token": "\u0120Lack", "merges": "\u0120L ack", "count": 256, "type": "remove by frequency"} -{"id": 22747, "token": "\u0120$<", "merges": "\u0120$ <", "count": 256, "type": "remove by frequency"} -{"id": 32821, "token": "\u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135", "merges": "\u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135 \u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135\u00e2\u0122\u0135", "count": 257, "type": "remove by frequency"} -{"id": 29357, "token": "\u0120vesicles", "merges": "\u0120ves icles", "count": 257, "type": "remove by frequency"} -{"id": 27199, "token": "\u0120Broadway", "merges": "\u0120Broad way", "count": 257, "type": "remove by frequency"} -{"id": 39132, "token": "\u0120cleaved", "merges": "\u0120cle aved", "count": 257, "type": "remove by frequency"} -{"id": 49813, "token": "\u0120Kelley", "merges": "\u0120Kel ley", "count": 257, "type": "remove by frequency"} -{"id": 46985, "token": "\u0120Lans", "merges": "\u0120L ans", "count": 257, "type": "remove by frequency"} -{"id": 30949, "token": "\u0120MCF", "merges": "\u0120M CF", "count": 257, "type": "remove by frequency"} -{"id": 28827, "token": "\u0120COPYRIGHT", "merges": "\u0120COP YRIGHT", "count": 258, "type": "remove by frequency"} -{"id": 42928, "token": "\u0120Compton", "merges": "\u0120Com pton", "count": 258, "type": "remove by frequency"} -{"id": 49226, "token": "\u0120Husband", "merges": "\u0120Hus band", "count": 258, "type": "remove by frequency"} -{"id": 28518, "token": "\u0120Hannah", "merges": "\u0120H annah", "count": 258, "type": "remove by frequency"} -{"id": 18257, "token": ">();", "merges": "> ();", "count": 258, "type": "remove by frequency"} -{"id": 28876, "token": "\\|_", "merges": "\\| _", "count": 258, "type": "remove by frequency"} -{"id": 38311, "token": "\u0120asymptotically", "merges": "\u0120asympt otically", "count": 259, "type": "remove by frequency"} -{"id": 22808, "token": "\u0120Liverpool", "merges": "\u0120Liver pool", "count": 259, "type": "remove by frequency"} -{"id": 38007, "token": "\u0120Parish", "merges": "\u0120Par ish", "count": 259, "type": "remove by frequency"} -{"id": 41713, "token": "oxylin", "merges": "oxyl in", "count": 259, "type": "remove by frequency"} -{"id": 40212, "token": "\u0120Salad", "merges": "\u0120Sal ad", "count": 259, "type": "remove by frequency"} -{"id": 46130, "token": "rbrace", "merges": "r brace", "count": 259, "type": "remove by frequency"} -{"id": 35334, "token": "\u0120Chern", "merges": "\u0120C hern", "count": 259, "type": "remove by frequency"} -{"id": 50145, "token": "\u0120circumferential", "merges": "\u0120circum ferential", "count": 260, "type": "remove by frequency"} -{"id": 50098, "token": "\u0120cardiomyopathy", "merges": "\u0120cardiomy opathy", "count": 260, "type": "remove by frequency"} -{"id": 41871, "token": "\u0120Regulations", "merges": "\u0120Reg ulations", "count": 260, "type": "remove by frequency"} -{"id": 22437, "token": "\u0120Egyptian", "merges": "\u0120Egypt ian", "count": 260, "type": "remove by frequency"} -{"id": 28955, "token": "\u0120Napole", "merges": "\u0120Nap ole", "count": 260, "type": "remove by frequency"} -{"id": 46612, "token": "\u0120Lilly", "merges": "\u0120L illy", "count": 260, "type": "remove by frequency"} -{"id": 29322, "token": "\u0120Filed", "merges": "\u0120F iled", "count": 260, "type": "remove by frequency"} -{"id": 41833, "token": "\u0120Niss", "merges": "\u0120N iss", "count": 260, "type": "remove by frequency"} -{"id": 48614, "token": "\u00ce\u00ba\u00ce\u00b1", "merges": "\u00ce\u00ba \u00ce\u00b1", "count": 260, "type": "remove by frequency"} -{"id": 30527, "token": "\u0120kDa", "merges": "\u0120k Da", "count": 260, "type": "remove by frequency"} -{"id": 35098, "token": "\u0120BSA", "merges": "\u0120B SA", "count": 260, "type": "remove by frequency"} -{"id": 23719, "token": "\u0120ApJ", "merges": "\u0120Ap J", "count": 260, "type": "remove by frequency"} -{"id": 12136, "token": "\u00c2\u00be", "merges": "\u00c2 \u00be", "count": 260, "type": "remove by frequency"} -{"id": 42427, "token": "\u0120Revolutionary", "merges": "\u0120Revolution ary", "count": 261, "type": "remove by frequency"} -{"id": 29490, "token": "\u0120Northwest", "merges": "\u0120North west", "count": 261, "type": "remove by frequency"} -{"id": 33278, "token": "\u0120PROVIDED", "merges": "\u0120PROVID ED", "count": 261, "type": "remove by frequency"} -{"id": 28351, "token": "\u0120Former", "merges": "\u0120Form er", "count": 261, "type": "remove by frequency"} -{"id": 44851, "token": "{|\\", "merges": "{ |\\", "count": 261, "type": "remove by frequency"} -{"id": 45330, "token": "ETHERTYPE", "merges": "ETH ERTYPE", "count": 262, "type": "remove by frequency"} -{"id": 38343, "token": "\u0120Hamburg", "merges": "\u0120Hamb urg", "count": 262, "type": "remove by frequency"} -{"id": 48077, "token": "\u0120Watkins", "merges": "\u0120Wat kins", "count": 262, "type": "remove by frequency"} -{"id": 35112, "token": "\u0120Pradesh", "merges": "\u0120Pr adesh", "count": 262, "type": "remove by frequency"} -{"id": 15391, "token": "\u0120Kansas", "merges": "\u0120K ansas", "count": 262, "type": "remove by frequency"} -{"id": 40208, "token": "\u0120Papa", "merges": "\u0120Pap a", "count": 262, "type": "remove by frequency"} -{"id": 22623, "token": "\u0120cytometry", "merges": "\u0120cyt ometry", "count": 263, "type": "remove by frequency"} -{"id": 49566, "token": "\u0120Disorders", "merges": "\u0120Dis orders", "count": 263, "type": "remove by frequency"} -{"id": 5577, "token": "mathfrak", "merges": "math frak", "count": 263, "type": "remove by frequency"} -{"id": 31328, "token": "\u0120Hockey", "merges": "\u0120H ockey", "count": 263, "type": "remove by frequency"} -{"id": 46283, "token": "\u0120Cardi", "merges": "\u0120Card i", "count": 263, "type": "remove by frequency"} -{"id": 41268, "token": "\u0120USC", "merges": "\u0120US C", "count": 263, "type": "remove by frequency"} -{"id": 33047, "token": "\u0120//!", "merges": "\u0120// !", "count": 263, "type": "remove by frequency"} -{"id": 28592, "token": "\u0120\u00c2\u00b5m", "merges": "\u0120\u00c2\u00b5 m", "count": 263, "type": "remove by frequency"} -{"id": 34867, "token": "\u0120(\u00c2\u00a7", "merges": "\u0120( \u00c2\u00a7", "count": 263, "type": "remove by frequency"} -{"id": 47022, "token": "\u00c5\u00a1\u00c3\u0143", "merges": "\u00c5\u00a1 \u00c3\u0143", "count": 263, "type": "remove by frequency"} -{"id": 27324, "token": "\u0120}$", "merges": "\u0120 }$", "count": 263, "type": "remove by frequency"} -{"id": 33638, "token": "\u0120IOException", "merges": "\u0120IO Exception", "count": 264, "type": "remove by frequency"} -{"id": 42819, "token": "\u0120Surgical", "merges": "\u0120S urgical", "count": 264, "type": "remove by frequency"} -{"id": 39351, "token": "\u0120Salvador", "merges": "\u0120Salv ador", "count": 264, "type": "remove by frequency"} -{"id": 49334, "token": "\u0120DAMAGE", "merges": "\u0120DAM AGE", "count": 264, "type": "remove by frequency"} -{"id": 44133, "token": "\u0120Luxem", "merges": "\u0120Lux em", "count": 264, "type": "remove by frequency"} -{"id": 41788, "token": "\u0120h\u00c3\u00a4", "merges": "\u0120h \u00c3\u00a4", "count": 264, "type": "remove by frequency"} -{"id": 50347, "token": "\u00e2\u0137\u0140", "merges": "\u00e2\u0137 \u0140", "count": 264, "type": "remove by frequency"} -{"id": 36250, "token": "\u0120Mohammed", "merges": "\u0120Moh ammed", "count": 265, "type": "remove by frequency"} -{"id": 33112, "token": "\u0120biofilm", "merges": "\u0120bio film", "count": 265, "type": "remove by frequency"} -{"id": 47799, "token": "\u0120Titans", "merges": "\u0120Tit ans", "count": 265, "type": "remove by frequency"} -{"id": 53780, "token": "\u00e8\u00ae\u013c", "merges": "\u00e8\u00ae \u013c", "count": 265, "type": "remove by frequency"} -{"id": 34328, "token": "\u0120Westminster", "merges": "\u0120West minster", "count": 266, "type": "remove by frequency"} -{"id": 33166, "token": "\u0120Infantry", "merges": "\u0120Inf antry", "count": 266, "type": "remove by frequency"} -{"id": 43648, "token": "\u0120Benefits", "merges": "\u0120Benef its", "count": 266, "type": "remove by frequency"} -{"id": 35750, "token": "\u0120cysteine", "merges": "\u0120cy steine", "count": 266, "type": "remove by frequency"} -{"id": 30140, "token": "\u0120Miranda", "merges": "\u0120Mir anda", "count": 266, "type": "remove by frequency"} -{"id": 47786, "token": "\u0120Tobacco", "merges": "\u0120Tob acco", "count": 266, "type": "remove by frequency"} -{"id": 8797, "token": "\u0120Muslim", "merges": "\u0120Mus lim", "count": 266, "type": "remove by frequency"} -{"id": 25556, "token": "\u0120Reagan", "merges": "\u0120Re agan", "count": 266, "type": "remove by frequency"} -{"id": 36353, "token": "\u0120Plato", "merges": "\u0120Pl ato", "count": 266, "type": "remove by frequency"} -{"id": 11563, "token": "\u0120Jews", "merges": "\u0120Jew s", "count": 266, "type": "remove by frequency"} -{"id": 27435, "token": "\u00ce\u00bf\u00cf\u012f", "merges": "\u00ce\u00bf \u00cf\u012f", "count": 266, "type": "remove by frequency"} -{"id": 27487, "token": "\u0120\u00c8\u013bi", "merges": "\u0120 \u00c8\u013bi", "count": 266, "type": "remove by frequency"} -{"id": 47149, "token": "\u0120patterning", "merges": "\u0120patter ning", "count": 267, "type": "remove by frequency"} -{"id": 46765, "token": "\u0120guerra", "merges": "\u0120gu erra", "count": 267, "type": "remove by frequency"} -{"id": 22366, "token": "\u0120Maine", "merges": "\u0120M aine", "count": 267, "type": "remove by frequency"} -{"id": 29508, "token": "\u0120onder", "merges": "\u0120on der", "count": 267, "type": "remove by frequency"} -{"id": 39964, "token": "\u0120waar", "merges": "\u0120wa ar", "count": 267, "type": "remove by frequency"} -{"id": 25492, "token": "\u0120^\\", "merges": "\u0120 ^\\", "count": 267, "type": "remove by frequency"} -{"id": 40523, "token": "\u0120immunoglobulin", "merges": "\u0120immunoglob ulin", "count": 268, "type": "remove by frequency"} -{"id": 34285, "token": "\u0120glutathione", "merges": "\u0120glut athione", "count": 268, "type": "remove by frequency"} -{"id": 30316, "token": "\u0120oligonucle", "merges": "\u0120olig onucle", "count": 268, "type": "remove by frequency"} -{"id": 27457, "token": "\u0120vaginal", "merges": "\u0120vag inal", "count": 268, "type": "remove by frequency"} -{"id": 42223, "token": "\u0120CRISPR", "merges": "\u0120CR ISPR", "count": 268, "type": "remove by frequency"} -{"id": 49942, "token": "\u0120nanoc", "merges": "\u0120nan oc", "count": 268, "type": "remove by frequency"} -{"id": 46394, "token": "\u0120myocardium", "merges": "\u0120myocard ium", "count": 269, "type": "remove by frequency"} -{"id": 47748, "token": "olymers", "merges": "ol ymers", "count": 269, "type": "remove by frequency"} -{"id": 47945, "token": "\u0120Aviv", "merges": "\u0120Av iv", "count": 269, "type": "remove by frequency"} -{"id": 29924, "token": "Bigl", "merges": "Big l", "count": 269, "type": "remove by frequency"} -{"id": 35819, "token": "\u0120CONTRIBUT", "merges": "\u0120CONTR IBUT", "count": 270, "type": "remove by frequency"} -{"id": 25771, "token": "\u0120Norwegian", "merges": "\u0120Nor wegian", "count": 270, "type": "remove by frequency"} -{"id": 37539, "token": "\u0120Gandhi", "merges": "\u0120Gand hi", "count": 270, "type": "remove by frequency"} -{"id": 37294, "token": "\u00c3\u0143ses", "merges": "\u00c3\u0143 ses", "count": 270, "type": "remove by frequency"} -{"id": 45109, "token": "ticos", "merges": "tic os", "count": 270, "type": "remove by frequency"} -{"id": 47324, "token": "\u00c3\u00aetre", "merges": "\u00c3\u00ae tre", "count": 270, "type": "remove by frequency"} -{"id": 46552, "token": "pmed", "merges": "p med", "count": 270, "type": "remove by frequency"} -{"id": 22899, "token": "\u0120HCV", "merges": "\u0120H CV", "count": 271, "type": "remove by frequency"} -{"id": 47007, "token": "')->", "merges": "') ->", "count": 271, "type": "remove by frequency"} -{"id": 51978, "token": "\u00e6\u012f\u00b2", "merges": "\u00e6\u012f \u00b2", "count": 271, "type": "remove by frequency"} -{"id": 34665, "token": "\u0120Interests", "merges": "\u0120Interest s", "count": 272, "type": "remove by frequency"} -{"id": 24604, "token": "\u0120Montreal", "merges": "\u0120Mont real", "count": 272, "type": "remove by frequency"} -{"id": 36072, "token": "\u0120Earlier", "merges": "\u0120E arlier", "count": 272, "type": "remove by frequency"} -{"id": 41253, "token": "\u0120dieser", "merges": "\u0120dies er", "count": 272, "type": "remove by frequency"} -{"id": 38900, "token": "\u0120Omar", "merges": "\u0120O mar", "count": 272, "type": "remove by frequency"} -{"id": 46847, "token": "\u0120Marl", "merges": "\u0120Mar l", "count": 272, "type": "remove by frequency"} -{"id": 54423, "token": "\u00e9\u0142\u00bc", "merges": "\u00e9\u0142 \u00bc", "count": 272, "type": "remove by frequency"} -{"id": 48843, "token": "\u0120eigenstates", "merges": "\u0120eigen states", "count": 273, "type": "remove by frequency"} -{"id": 49850, "token": "\u0120Provincial", "merges": "\u0120Prov incial", "count": 273, "type": "remove by frequency"} -{"id": 49267, "token": "\u0120parece", "merges": "\u0120pare ce", "count": 273, "type": "remove by frequency"} -{"id": 3363, "token": "}$.", "merges": "}$ .", "count": 273, "type": "remove by frequency"} -{"id": 15243, "token": "}}=", "merges": "}} =", "count": 273, "type": "remove by frequency"} -{"id": 33887, "token": "\u0120\u00cf\u0127", "merges": "\u0120 \u00cf\u0127", "count": 273, "type": "remove by frequency"} -{"id": 40368, "token": "\u0120mechanistic", "merges": "\u0120mechan istic", "count": 274, "type": "remove by frequency"} -{"id": 30719, "token": "\u0120macrophage", "merges": "\u0120macroph age", "count": 274, "type": "remove by frequency"} -{"id": 43318, "token": "\u0120metformin", "merges": "\u0120met formin", "count": 274, "type": "remove by frequency"} -{"id": 27611, "token": "\u0120\u00c3\u00a9tait", "merges": "\u0120\u00c3\u00a9 tait", "count": 274, "type": "remove by frequency"} -{"id": 30028, "token": "\u0120Serum", "merges": "\u0120Ser um", "count": 274, "type": "remove by frequency"} -{"id": 21103, "token": "tfrac", "merges": "t frac", "count": 274, "type": "remove by frequency"} -{"id": 50227, "token": "\u0120Cary", "merges": "\u0120C ary", "count": 274, "type": "remove by frequency"} -{"id": 26097, "token": "\u00ce\u00af\u00ce\u00bd", "merges": "\u00ce\u00af \u00ce\u00bd", "count": 274, "type": "remove by frequency"} -{"id": 45275, "token": "\u0120ZnO", "merges": "\u0120Zn O", "count": 274, "type": "remove by frequency"} -{"id": 31234, "token": "\u0120t\u00c3\u00a4", "merges": "\u0120t \u00c3\u00a4", "count": 274, "type": "remove by frequency"} -{"id": 54422, "token": "\u00e9\u0142\u00ab", "merges": "\u00e9\u0142 \u00ab", "count": 274, "type": "remove by frequency"} -{"id": 31379, "token": "}}[", "merges": "}} [", "count": 274, "type": "remove by frequency"} -{"id": 50289, "token": "\u00cb\u013c", "merges": "\u00cb \u013c", "count": 274, "type": "remove by frequency"} -{"id": 20760, "token": "emptyset", "merges": "empt yset", "count": 275, "type": "remove by frequency"} -{"id": 42376, "token": "\u0120Hussein", "merges": "\u0120Hus sein", "count": 275, "type": "remove by frequency"} -{"id": 47293, "token": "\u0120haemat", "merges": "\u0120ha emat", "count": 275, "type": "remove by frequency"} -{"id": 41382, "token": "\u0120axons", "merges": "\u0120ax ons", "count": 275, "type": "remove by frequency"} -{"id": 40794, "token": "\u0120Joey", "merges": "\u0120Jo ey", "count": 275, "type": "remove by frequency"} -{"id": 50249, "token": "\u0120Tus", "merges": "\u0120T us", "count": 275, "type": "remove by frequency"} -{"id": 28530, "token": "\u0120NHS", "merges": "\u0120N HS", "count": 275, "type": "remove by frequency"} -{"id": 22075, "token": "\u0120Philippines", "merges": "\u0120Philipp ines", "count": 276, "type": "remove by frequency"} -{"id": 42887, "token": "\u0120placental", "merges": "\u0120plac ental", "count": 276, "type": "remove by frequency"} -{"id": 40854, "token": "\u0120colitis", "merges": "\u0120col itis", "count": 276, "type": "remove by frequency"} -{"id": 42431, "token": "\u0120Epstein", "merges": "\u0120Ep stein", "count": 276, "type": "remove by frequency"} -{"id": 15222, "token": "\u0120Besides", "merges": "\u0120B esides", "count": 276, "type": "remove by frequency"} -{"id": 44567, "token": "\u0120Viking", "merges": "\u0120V iking", "count": 276, "type": "remove by frequency"} -{"id": 20829, "token": "\u0120Alaska", "merges": "\u0120Al aska", "count": 276, "type": "remove by frequency"} -{"id": 50400, "token": "\u00e3\u0122\u013f", "merges": "\u00e3\u0122 \u013f", "count": 276, "type": "remove by frequency"} -{"id": 45876, "token": "\u0120nanoparticle", "merges": "\u0120nanop article", "count": 277, "type": "remove by frequency"} -{"id": 36835, "token": "\u0120Hernandez", "merges": "\u0120Hern andez", "count": 277, "type": "remove by frequency"} -{"id": 28410, "token": "\u0120histone", "merges": "\u0120hist one", "count": 277, "type": "remove by frequency"} -{"id": 43785, "token": "\u0120Sandra", "merges": "\u0120Sand ra", "count": 277, "type": "remove by frequency"} -{"id": 26411, "token": "\u0120Pete", "merges": "\u0120Pet e", "count": 277, "type": "remove by frequency"} -{"id": 43037, "token": "n\u00c4\u013d", "merges": "n \u00c4\u013d", "count": 277, "type": "remove by frequency"} -{"id": 50282, "token": "\u00cb\u0129", "merges": "\u00cb \u0129", "count": 277, "type": "remove by frequency"} -{"id": 21673, "token": "\u0120differentially", "merges": "\u0120different ially", "count": 278, "type": "remove by frequency"} -{"id": 41144, "token": "\u0120Signaling", "merges": "\u0120Sign aling", "count": 278, "type": "remove by frequency"} -{"id": 45034, "token": "\u0120cationic", "merges": "\u0120cation ic", "count": 278, "type": "remove by frequency"} -{"id": 43949, "token": "\u0120Manila", "merges": "\u0120Man ila", "count": 278, "type": "remove by frequency"} -{"id": 49668, "token": "\u0120Plains", "merges": "\u0120Pl ains", "count": 278, "type": "remove by frequency"} -{"id": 27718, "token": "\u00cf\u0122\u00cf\u012e", "merges": "\u00cf\u0122 \u00cf\u012e", "count": 278, "type": "remove by frequency"} -{"id": 37471, "token": ">';", "merges": "> ';", "count": 278, "type": "remove by frequency"} -{"id": 37641, "token": "\u0120Jackie", "merges": "\u0120Jack ie", "count": 279, "type": "remove by frequency"} -{"id": 46831, "token": "\u0120Nolan", "merges": "\u0120N olan", "count": 279, "type": "remove by frequency"} -{"id": 37140, "token": "\u0120Assay", "merges": "\u0120Ass ay", "count": 279, "type": "remove by frequency"} -{"id": 40060, "token": "\u0120Acad", "merges": "\u0120Ac ad", "count": 279, "type": "remove by frequency"} -{"id": 26867, "token": "\u00cf\u0130\u00ce\u00bd", "merges": "\u00cf\u0130 \u00ce\u00bd", "count": 279, "type": "remove by frequency"} -{"id": 31858, "token": "\u0120\\}", "merges": "\u0120\\ }", "count": 279, "type": "remove by frequency"} -{"id": 17913, "token": "\u0120Jerusalem", "merges": "\u0120Jer usalem", "count": 280, "type": "remove by frequency"} -{"id": 46182, "token": "\u0120menstrual", "merges": "\u0120menstru al", "count": 280, "type": "remove by frequency"} -{"id": 34928, "token": "\u0120DISTRICT", "merges": "\u0120DIST RICT", "count": 280, "type": "remove by frequency"} -{"id": 36890, "token": "\u0120Bradford", "merges": "\u0120Brad ford", "count": 280, "type": "remove by frequency"} -{"id": 29040, "token": "\u0120Brandon", "merges": "\u0120Br andon", "count": 280, "type": "remove by frequency"} -{"id": 42442, "token": "\u0120Bundes", "merges": "\u0120Bund es", "count": 280, "type": "remove by frequency"} -{"id": 40434, "token": "\u0120Lords", "merges": "\u0120Lord s", "count": 280, "type": "remove by frequency"} -{"id": 41451, "token": "\u00ce\u00bd\u00ce\u00b7", "merges": "\u00ce\u00bd \u00ce\u00b7", "count": 280, "type": "remove by frequency"} -{"id": 33923, "token": ")|^", "merges": ")| ^", "count": 280, "type": "remove by frequency"} -{"id": 35229, "token": "\u0120interferon", "merges": "\u0120interfer on", "count": 281, "type": "remove by frequency"} -{"id": 34409, "token": "\u0120Malcolm", "merges": "\u0120Mal colm", "count": 281, "type": "remove by frequency"} -{"id": 44056, "token": "\u0120Lindsay", "merges": "\u0120Lind say", "count": 281, "type": "remove by frequency"} -{"id": 46571, "token": "\u0120entend", "merges": "\u0120ent end", "count": 281, "type": "remove by frequency"} -{"id": 47660, "token": "\u0120Calder", "merges": "\u0120Cal der", "count": 281, "type": "remove by frequency"} -{"id": 37921, "token": "\u0120Molly", "merges": "\u0120M olly", "count": 281, "type": "remove by frequency"} -{"id": 48304, "token": "\u0120parce", "merges": "\u0120par ce", "count": 281, "type": "remove by frequency"} -{"id": 44553, "token": "\u0120electrophys", "merges": "\u0120electroph ys", "count": 282, "type": "remove by frequency"} -{"id": 36449, "token": "\u0120Christine", "merges": "\u0120Christ ine", "count": 282, "type": "remove by frequency"} -{"id": 38535, "token": "DISCUSSION", "merges": "DIS CUSSION", "count": 282, "type": "remove by frequency"} -{"id": 24084, "token": "\u0120Portugal", "merges": "\u0120Portug al", "count": 282, "type": "remove by frequency"} -{"id": 20901, "token": "\u0120Compared", "merges": "\u0120Comp ared", "count": 282, "type": "remove by frequency"} -{"id": 26013, "token": "\u0120Anyone", "merges": "\u0120Any one", "count": 282, "type": "remove by frequency"} -{"id": 47914, "token": "\u0120Sout", "merges": "\u0120S out", "count": 282, "type": "remove by frequency"} -{"id": 9496, "token": "}}^{", "merges": "}} ^{", "count": 282, "type": "remove by frequency"} -{"id": 28817, "token": "\u0120\u00e0\u00a4\u00b9", "merges": "\u0120\u00e0\u00a4 \u00b9", "count": 282, "type": "remove by frequency"} -{"id": 40237, "token": "\u00ce\u00bc\u00ce\u00bf", "merges": "\u00ce\u00bc \u00ce\u00bf", "count": 282, "type": "remove by frequency"} -{"id": 24449, "token": "\u0120\u00c3\u0125", "merges": "\u0120\u00c3 \u0125", "count": 282, "type": "remove by frequency"} -{"id": 35006, "token": "\u0120stromal", "merges": "\u0120strom al", "count": 283, "type": "remove by frequency"} -{"id": 50218, "token": "\u0120Ramos", "merges": "\u0120Ram os", "count": 283, "type": "remove by frequency"} -{"id": 22361, "token": "\u0120Higgs", "merges": "\u0120Hig gs", "count": 283, "type": "remove by frequency"} -{"id": 45714, "token": "\u0120Hogan", "merges": "\u0120H ogan", "count": 283, "type": "remove by frequency"} -{"id": 41692, "token": "\u0120Jude", "merges": "\u0120J ude", "count": 283, "type": "remove by frequency"} -{"id": 25703, "token": "})^{", "merges": "}) ^{", "count": 283, "type": "remove by frequency"} -{"id": 31040, "token": "}\\|", "merges": "}\\ |", "count": 283, "type": "remove by frequency"} -{"id": 49279, "token": "\u0120hypothalamus", "merges": "\u0120hypothal amus", "count": 284, "type": "remove by frequency"} -{"id": 36670, "token": "\u0120millilit", "merges": "\u0120mill ilit", "count": 284, "type": "remove by frequency"} -{"id": 24992, "token": "\u0120Judgment", "merges": "\u0120Jud gment", "count": 284, "type": "remove by frequency"} -{"id": 39206, "token": "\u0120Patricia", "merges": "\u0120Pat ricia", "count": 284, "type": "remove by frequency"} -{"id": 42167, "token": "\u0120Wendy", "merges": "\u0120Wend y", "count": 284, "type": "remove by frequency"} -{"id": 41456, "token": "))*-", "merges": ")) *-", "count": 284, "type": "remove by frequency"} -{"id": 23743, "token": "}$\\", "merges": "}$ \\", "count": 284, "type": "remove by frequency"} -{"id": 50280, "token": "\u00c9\u00a1", "merges": "\u00c9 \u00a1", "count": 284, "type": "remove by frequency"} -{"id": 30256, "token": "\u0120Parkinson", "merges": "\u0120Park inson", "count": 285, "type": "remove by frequency"} -{"id": 32762, "token": "\u0120Chev", "merges": "\u0120Che v", "count": 285, "type": "remove by frequency"} -{"id": 41742, "token": "\u0120UCLA", "merges": "\u0120U CLA", "count": 285, "type": "remove by frequency"} -{"id": 40934, "token": "\u0120Rash", "merges": "\u0120R ash", "count": 285, "type": "remove by frequency"} -{"id": 52025, "token": "\u00e6\u0131\u00b9", "merges": "\u00e6\u0131 \u00b9", "count": 285, "type": "remove by frequency"} -{"id": 34187, "token": "ClickListener", "merges": "Click Listener", "count": 286, "type": "remove by frequency"} -{"id": 30614, "token": "\u0120inactivation", "merges": "\u0120in activation", "count": 286, "type": "remove by frequency"} -{"id": 35702, "token": "\u0120Buddhism", "merges": "\u0120Buddh ism", "count": 286, "type": "remove by frequency"} -{"id": 45664, "token": "\u0120Allison", "merges": "\u0120All ison", "count": 286, "type": "remove by frequency"} -{"id": 43649, "token": "\u0120Guards", "merges": "\u0120Gu ards", "count": 286, "type": "remove by frequency"} -{"id": 40549, "token": "\u0120Maced", "merges": "\u0120M aced", "count": 286, "type": "remove by frequency"} -{"id": 43294, "token": "\u0120Revel", "merges": "\u0120Re vel", "count": 286, "type": "remove by frequency"} -{"id": 36825, "token": "ist\u00c3\u00a4", "merges": "ist \u00c3\u00a4", "count": 286, "type": "remove by frequency"} -{"id": 27977, "token": "\u0120QCD", "merges": "\u0120Q CD", "count": 286, "type": "remove by frequency"} -{"id": 45612, "token": "\u0120echocardiography", "merges": "\u0120echocardi ography", "count": 287, "type": "remove by frequency"} -{"id": 49339, "token": "\u0120Procedures", "merges": "\u0120Proced ures", "count": 287, "type": "remove by frequency"} -{"id": 34040, "token": "\u0120fermions", "merges": "\u0120ferm ions", "count": 287, "type": "remove by frequency"} -{"id": 48025, "token": "\u0120vesicle", "merges": "\u0120ves icle", "count": 287, "type": "remove by frequency"} -{"id": 49983, "token": "\u0120Manor", "merges": "\u0120Man or", "count": 287, "type": "remove by frequency"} -{"id": 33252, "token": "\u0120Anglo", "merges": "\u0120Angl o", "count": 287, "type": "remove by frequency"} -{"id": 37669, "token": "\u0120Rocky", "merges": "\u0120Rock y", "count": 287, "type": "remove by frequency"} -{"id": 32807, "token": "\u0120Teh", "merges": "\u0120Te h", "count": 287, "type": "remove by frequency"} -{"id": 24815, "token": "->_", "merges": "-> _", "count": 287, "type": "remove by frequency"} -{"id": 17990, "token": ")-\\", "merges": ") -\\", "count": 287, "type": "remove by frequency"} -{"id": 31939, "token": "\u0120phospholip", "merges": "\u0120phosph olip", "count": 288, "type": "remove by frequency"} -{"id": 22126, "token": "\u0120Vancouver", "merges": "\u0120V ancouver", "count": 288, "type": "remove by frequency"} -{"id": 29797, "token": "\u0120Southeast", "merges": "\u0120S outheast", "count": 288, "type": "remove by frequency"} -{"id": 43627, "token": "\u0120Violence", "merges": "\u0120Viol ence", "count": 288, "type": "remove by frequency"} -{"id": 32866, "token": "\u0120homotopy", "merges": "\u0120homot opy", "count": 288, "type": "remove by frequency"} -{"id": 47569, "token": "\u0120Geoffrey", "merges": "\u0120Geoff rey", "count": 288, "type": "remove by frequency"} -{"id": 34567, "token": "\u0120Antib", "merges": "\u0120Ant ib", "count": 288, "type": "remove by frequency"} -{"id": 30422, "token": "\u0120v\u00c3\u00a6", "merges": "\u0120v \u00c3\u00a6", "count": 288, "type": "remove by frequency"} -{"id": 46257, "token": "}),\\", "merges": "} ),\\", "count": 288, "type": "remove by frequency"} -{"id": 40589, "token": "\u0120FAA", "merges": "\u0120F AA", "count": 288, "type": "remove by frequency"} -{"id": 15892, "token": "\u0120^{", "merges": "\u0120 ^{", "count": 288, "type": "remove by frequency"} -{"id": 14748, "token": "\u0120Minnesota", "merges": "\u0120Minn esota", "count": 289, "type": "remove by frequency"} -{"id": 40548, "token": "\u0120epitope", "merges": "\u0120epit ope", "count": 289, "type": "remove by frequency"} -{"id": 46055, "token": "\u0120trypt", "merges": "\u0120try pt", "count": 289, "type": "remove by frequency"} -{"id": 31078, "token": "\u0120Hels", "merges": "\u0120H els", "count": 289, "type": "remove by frequency"} -{"id": 22537, "token": "\u0120immunohist", "merges": "\u0120immun ohist", "count": 290, "type": "remove by frequency"} -{"id": 16011, "token": "\u0120cytokines", "merges": "\u0120cytok ines", "count": 290, "type": "remove by frequency"} -{"id": 28260, "token": "\u0120Giants", "merges": "\u0120Gi ants", "count": 290, "type": "remove by frequency"} -{"id": 43738, "token": "iorari", "merges": "ior ari", "count": 290, "type": "remove by frequency"} -{"id": 40858, "token": "\u0120Laden", "merges": "\u0120L aden", "count": 290, "type": "remove by frequency"} -{"id": 32393, "token": "\u0120Seth", "merges": "\u0120S eth", "count": 290, "type": "remove by frequency"} -{"id": 50209, "token": "\u0120habl", "merges": "\u0120h abl", "count": 290, "type": "remove by frequency"} -{"id": 46664, "token": "\u0120KCl", "merges": "\u0120K Cl", "count": 290, "type": "remove by frequency"} -{"id": 48992, "token": "\u0120FEV", "merges": "\u0120F EV", "count": 290, "type": "remove by frequency"} -{"id": 13338, "token": "\u0120Colorado", "merges": "\u0120Color ado", "count": 291, "type": "remove by frequency"} -{"id": 49350, "token": "\u0120Alic", "merges": "\u0120A lic", "count": 291, "type": "remove by frequency"} -{"id": 49633, "token": ")}+", "merges": ")} +", "count": 291, "type": "remove by frequency"} -{"id": 37815, "token": "Drosophila", "merges": "D rosophila", "count": 292, "type": "remove by frequency"} -{"id": 6767, "token": "widet", "merges": "wid et", "count": 292, "type": "remove by frequency"} -{"id": 47408, "token": "\u0120Coy", "merges": "\u0120C oy", "count": 292, "type": "remove by frequency"} -{"id": 36030, "token": "\u00ce\u00bb\u00ce\u00b5", "merges": "\u00ce\u00bb \u00ce\u00b5", "count": 292, "type": "remove by frequency"} -{"id": 50355, "token": "\u00e2\u0138\u0124", "merges": "\u00e2\u0138 \u0124", "count": 292, "type": "remove by frequency"} -{"id": 41866, "token": "\u0120cerevisiae", "merges": "\u0120cere visiae", "count": 293, "type": "remove by frequency"} -{"id": 34674, "token": "Upsilon", "merges": "U psilon", "count": 293, "type": "remove by frequency"} -{"id": 48935, "token": "\u0120felon", "merges": "\u0120fel on", "count": 293, "type": "remove by frequency"} -{"id": 46737, "token": "\u0120Cain", "merges": "\u0120C ain", "count": 293, "type": "remove by frequency"} -{"id": 13090, "token": "\u0120\u00c3\u00aen", "merges": "\u0120\u00c3\u00ae n", "count": 293, "type": "remove by frequency"} -{"id": 41158, "token": "\u0120Cly", "merges": "\u0120C ly", "count": 293, "type": "remove by frequency"} -{"id": 37957, "token": "\u0120NPR", "merges": "\u0120N PR", "count": 293, "type": "remove by frequency"} -{"id": 33221, "token": "\u00ce\u00ae\u00cf\u0124", "merges": "\u00ce\u00ae \u00cf\u0124", "count": 293, "type": "remove by frequency"} -{"id": 36003, "token": "\u00cf\u0123\u00ce\u00b3", "merges": "\u00cf\u0123 \u00ce\u00b3", "count": 293, "type": "remove by frequency"} -{"id": 48329, "token": ")$)", "merges": ")$ )", "count": 293, "type": "remove by frequency"} -{"id": 9213, "token": "}+\\", "merges": "} +\\", "count": 293, "type": "remove by frequency"} -{"id": 40823, "token": "\u00c5\u00bee", "merges": "\u00c5\u00be e", "count": 293, "type": "remove by frequency"} -{"id": 30041, "token": "\u00c3\u013d", "merges": "\u00c3 \u013d", "count": 293, "type": "remove by frequency"} -{"id": 42557, "token": "\u0120politique", "merges": "\u0120polit ique", "count": 294, "type": "remove by frequency"} -{"id": 13115, "token": "\u0120Pakistan", "merges": "\u0120Pak istan", "count": 294, "type": "remove by frequency"} -{"id": 44810, "token": "\u0120estaba", "merges": "\u0120est aba", "count": 294, "type": "remove by frequency"} -{"id": 34410, "token": "\u0120Guinea", "merges": "\u0120Gu inea", "count": 294, "type": "remove by frequency"} -{"id": 46109, "token": "\u0120jamais", "merges": "\u0120j amais", "count": 294, "type": "remove by frequency"} -{"id": 43242, "token": "\u00cf\u0126\u00ce\u00ae", "merges": "\u00cf\u0126 \u00ce\u00ae", "count": 294, "type": "remove by frequency"} -{"id": 28270, "token": "\u0120Thirty", "merges": "\u0120Th irty", "count": 295, "type": "remove by frequency"} -{"id": 48055, "token": "\u0120Violet", "merges": "\u0120V iolet", "count": 295, "type": "remove by frequency"} -{"id": 30290, "token": "\u0120k\u00c3\u00b6nn", "merges": "\u0120k\u00c3\u00b6 nn", "count": 295, "type": "remove by frequency"} -{"id": 48239, "token": "\u0120nanow", "merges": "\u0120nan ow", "count": 295, "type": "remove by frequency"} -{"id": 44537, "token": "lbrace", "merges": "l brace", "count": 295, "type": "remove by frequency"} -{"id": 35288, "token": "\u0120Tumor", "merges": "\u0120T umor", "count": 295, "type": "remove by frequency"} -{"id": 16724, "token": "\u0120\u00e2\u0122\u0140", "merges": "\u0120\u00e2\u0122 \u0140", "count": 295, "type": "remove by frequency"} -{"id": 42579, "token": "\u0120Cricket", "merges": "\u0120Crick et", "count": 296, "type": "remove by frequency"} -{"id": 36788, "token": "\u0120Notch", "merges": "\u0120Not ch", "count": 296, "type": "remove by frequency"} -{"id": 40971, "token": "\u0120Nietzsche", "merges": "\u0120N ietzsche", "count": 297, "type": "remove by frequency"} -{"id": 44620, "token": "\u0120lysine", "merges": "\u0120lys ine", "count": 297, "type": "remove by frequency"} -{"id": 18636, "token": "^*$", "merges": "^* $", "count": 297, "type": "remove by frequency"} -{"id": 43727, "token": "\u0120hepatocellular", "merges": "\u0120hepat ocellular", "count": 298, "type": "remove by frequency"} -{"id": 49982, "token": "\u0120postseason", "merges": "\u0120post season", "count": 298, "type": "remove by frequency"} -{"id": 43854, "token": "\u0120ligation", "merges": "\u0120l igation", "count": 298, "type": "remove by frequency"} -{"id": 44786, "token": "\u0120Newport", "merges": "\u0120New port", "count": 298, "type": "remove by frequency"} -{"id": 47896, "token": "\u0120Cecil", "merges": "\u0120Cec il", "count": 298, "type": "remove by frequency"} -{"id": 41097, "token": "\u0120Hicks", "merges": "\u0120H icks", "count": 298, "type": "remove by frequency"} -{"id": 38557, "token": "\u0120Byz", "merges": "\u0120By z", "count": 298, "type": "remove by frequency"} -{"id": 50328, "token": "\u00e2\u012b\u00a1", "merges": "\u00e2\u012b \u00a1", "count": 298, "type": "remove by frequency"} -{"id": 47876, "token": "\u00c5\u0128", "merges": "\u00c5 \u0128", "count": 298, "type": "remove by frequency"} -{"id": 47640, "token": "otransferase", "merges": "otransfer ase", "count": 299, "type": "remove by frequency"} -{"id": 49734, "token": "\u0120testicular", "merges": "\u0120test icular", "count": 299, "type": "remove by frequency"} -{"id": 42985, "token": "\u0120Nursing", "merges": "\u0120Nurs ing", "count": 299, "type": "remove by frequency"} -{"id": 48448, "token": "\u0120excised", "merges": "\u0120exc ised", "count": 299, "type": "remove by frequency"} -{"id": 29183, "token": "\u0120Batman", "merges": "\u0120Bat man", "count": 299, "type": "remove by frequency"} -{"id": 26089, "token": "\u0120Nixon", "merges": "\u0120N ixon", "count": 299, "type": "remove by frequency"} -{"id": 23440, "token": "\u0120BASIS", "merges": "\u0120BAS IS", "count": 299, "type": "remove by frequency"} -{"id": 44950, "token": "\u0120Cumm", "merges": "\u0120C umm", "count": 299, "type": "remove by frequency"} -{"id": 39202, "token": "\u0120endometrial", "merges": "\u0120endomet rial", "count": 300, "type": "remove by frequency"} -{"id": 8731, "token": "setlength", "merges": "set length", "count": 300, "type": "remove by frequency"} -{"id": 46773, "token": "\u0120Roberto", "merges": "\u0120Rober to", "count": 300, "type": "remove by frequency"} -{"id": 16234, "token": "\u0120Mexican", "merges": "\u0120Mex ican", "count": 300, "type": "remove by frequency"} -{"id": 40448, "token": "\u0120Sexual", "merges": "\u0120Sex ual", "count": 300, "type": "remove by frequency"} -{"id": 46637, "token": "\u0120Sidney", "merges": "\u0120Sid ney", "count": 300, "type": "remove by frequency"} -{"id": 25731, "token": ":@\"", "merges": ": @\"", "count": 300, "type": "remove by frequency"} -{"id": 11466, "token": "\u00c2\u00b8", "merges": "\u00c2 \u00b8", "count": 300, "type": "remove by frequency"} -{"id": 24898, "token": "\u0120embodiments", "merges": "\u0120embod iments", "count": 301, "type": "remove by frequency"} -{"id": 41112, "token": "\u0120Sterling", "merges": "\u0120Ster ling", "count": 301, "type": "remove by frequency"} -{"id": 48162, "token": "onitrile", "merges": "on itrile", "count": 301, "type": "remove by frequency"} -{"id": 45746, "token": "\u0120Hodge", "merges": "\u0120H odge", "count": 301, "type": "remove by frequency"} -{"id": 43802, "token": "\u0120McKin", "merges": "\u0120McK in", "count": 301, "type": "remove by frequency"} -{"id": 49596, "token": "\u0120Nicol", "merges": "\u0120Nic ol", "count": 301, "type": "remove by frequency"} -{"id": 29164, "token": "\u0120Mais", "merges": "\u0120M ais", "count": 301, "type": "remove by frequency"} -{"id": 51977, "token": "\u00e6\u012f\u00b1", "merges": "\u00e6\u012f \u00b1", "count": 301, "type": "remove by frequency"} -{"id": 50307, "token": "\u00e2\u0124\u0125", "merges": "\u00e2\u0124 \u0125", "count": 301, "type": "remove by frequency"} -{"id": 49765, "token": "\u0120Emmanuel", "merges": "\u0120Em manuel", "count": 302, "type": "remove by frequency"} -{"id": 30638, "token": "\u0120Confed", "merges": "\u0120Conf ed", "count": 302, "type": "remove by frequency"} -{"id": 24607, "token": "\u0120Colomb", "merges": "\u0120Col omb", "count": 302, "type": "remove by frequency"} -{"id": 45014, "token": "\u0120Chloe", "merges": "\u0120Ch loe", "count": 302, "type": "remove by frequency"} -{"id": 27989, "token": "\u0120Tris", "merges": "\u0120Tr is", "count": 302, "type": "remove by frequency"} -{"id": 36669, "token": "\u0120Dana", "merges": "\u0120D ana", "count": 302, "type": "remove by frequency"} -{"id": 14808, "token": "\u00e2\u0122\u00af", "merges": "\u00e2\u0122 \u00af", "count": 302, "type": "remove by frequency"} -{"id": 25856, "token": "\u0120Kyle", "merges": "\u0120K yle", "count": 303, "type": "remove by frequency"} -{"id": 37710, "token": "\u0120\u00d9\u0123\u00d9\u012c", "merges": "\u0120\u00d9\u0123 \u00d9\u012c", "count": 303, "type": "remove by frequency"} -{"id": 32678, "token": "\u0120Thanksgiving", "merges": "\u0120Thanks giving", "count": 304, "type": "remove by frequency"} -{"id": 46048, "token": "\u0120autologous", "merges": "\u0120aut ologous", "count": 304, "type": "remove by frequency"} -{"id": 44340, "token": "\u0120Parties", "merges": "\u0120Part ies", "count": 304, "type": "remove by frequency"} -{"id": 49239, "token": "\u0120Newsp", "merges": "\u0120New sp", "count": 304, "type": "remove by frequency"} -{"id": 49871, "token": "\u0120Tours", "merges": "\u0120T ours", "count": 304, "type": "remove by frequency"} -{"id": 39345, "token": "\u0120Thir", "merges": "\u0120Th ir", "count": 304, "type": "remove by frequency"} -{"id": 48780, "token": "\u0120APA", "merges": "\u0120AP A", "count": 304, "type": "remove by frequency"} -{"id": 36737, "token": "\u0120Astrophys", "merges": "\u0120Ast rophys", "count": 305, "type": "remove by frequency"} -{"id": 25367, "token": "\u0120tyrosine", "merges": "\u0120ty rosine", "count": 305, "type": "remove by frequency"} -{"id": 19930, "token": "\u0120Europa", "merges": "\u0120Europ a", "count": 305, "type": "remove by frequency"} -{"id": 35666, "token": "\u0120decedent", "merges": "\u0120de cedent", "count": 306, "type": "remove by frequency"} -{"id": 41046, "token": "\u0120Breast", "merges": "\u0120Bre ast", "count": 306, "type": "remove by frequency"} -{"id": 48770, "token": "\u0120Territ", "merges": "\u0120Ter rit", "count": 306, "type": "remove by frequency"} -{"id": 48282, "token": "\u0120$$|", "merges": "\u0120$$ |", "count": 306, "type": "remove by frequency"} -{"id": 34441, "token": "\u0120\u00c3\u0126", "merges": "\u0120\u00c3 \u0126", "count": 306, "type": "remove by frequency"} -{"id": 36692, "token": "^{[", "merges": "^{ [", "count": 306, "type": "remove by frequency"} -{"id": 28976, "token": "\u0120biomarker", "merges": "\u0120biomark er", "count": 307, "type": "remove by frequency"} -{"id": 36195, "token": "Regarding", "merges": "Reg arding", "count": 307, "type": "remove by frequency"} -{"id": 23181, "token": "\u0120WARRANTY", "merges": "\u0120WARRANT Y", "count": 307, "type": "remove by frequency"} -{"id": 43735, "token": "\u0120ovary", "merges": "\u0120ov ary", "count": 307, "type": "remove by frequency"} -{"id": 47613, "token": "\u0120Irene", "merges": "\u0120I rene", "count": 307, "type": "remove by frequency"} -{"id": 48119, "token": "\u0120Fraz", "merges": "\u0120F raz", "count": 307, "type": "remove by frequency"} -{"id": 31959, "token": "\u0120Rebecca", "merges": "\u0120Re becca", "count": 308, "type": "remove by frequency"} -{"id": 36139, "token": "\u0120Isabel", "merges": "\u0120Is abel", "count": 308, "type": "remove by frequency"} -{"id": 49319, "token": "\u0120dette", "merges": "\u0120det te", "count": 308, "type": "remove by frequency"} -{"id": 43306, "token": "=\"$(", "merges": "=\" $(", "count": 308, "type": "remove by frequency"} -{"id": 46304, "token": "\u0120bioavailability", "merges": "\u0120bio availability", "count": 309, "type": "remove by frequency"} -{"id": 45187, "token": "\u0120Deutsche", "merges": "\u0120Deut sche", "count": 309, "type": "remove by frequency"} -{"id": 11481, "token": "\u0120Palest", "merges": "\u0120Pal est", "count": 309, "type": "remove by frequency"} -{"id": 32252, "token": "\u0120Casey", "merges": "\u0120Case y", "count": 309, "type": "remove by frequency"} -{"id": 41504, "token": "\u0120Eph", "merges": "\u0120E ph", "count": 309, "type": "remove by frequency"} -{"id": 8786, "token": "\u0120Republican", "merges": "\u0120Republic an", "count": 310, "type": "remove by frequency"} -{"id": 37446, "token": "////////////////////////////////////////////////////////////////", "merges": "//////////////////////////////// ////////////////////////////////", "count": 311, "type": "remove by frequency"} -{"id": 22356, "token": "\u0120Communist", "merges": "\u0120Commun ist", "count": 311, "type": "remove by frequency"} -{"id": 39431, "token": "\u0120Namely", "merges": "\u0120Nam ely", "count": 311, "type": "remove by frequency"} -{"id": 45481, "token": "\u0120erythe", "merges": "\u0120ery the", "count": 311, "type": "remove by frequency"} -{"id": 43634, "token": "\u0120Byron", "merges": "\u0120By ron", "count": 311, "type": "remove by frequency"} -{"id": 45431, "token": "\u0120\u00c3\u00a9p", "merges": "\u0120\u00c3\u00a9 p", "count": 311, "type": "remove by frequency"} -{"id": 33358, "token": "\u00e0\u00a5\u012a", "merges": "\u00e0\u00a5 \u012a", "count": 311, "type": "remove by frequency"} -{"id": 39889, "token": "\u0120misdemeanor", "merges": "\u0120misdem eanor", "count": 312, "type": "remove by frequency"} -{"id": 23498, "token": "\u0120Especially", "merges": "\u0120Es pecially", "count": 312, "type": "remove by frequency"} -{"id": 18383, "token": "\u0120\u00ce\u00ba\u00ce\u00b1", "merges": "\u0120\u00ce\u00ba \u00ce\u00b1", "count": 312, "type": "remove by frequency"} -{"id": 34738, "token": "\u0120Kitt", "merges": "\u0120K itt", "count": 312, "type": "remove by frequency"} -{"id": 41124, "token": "Differentiate", "merges": "Different iate", "count": 313, "type": "remove by frequency"} -{"id": 13786, "token": "\u0120Petition", "merges": "\u0120P etition", "count": 313, "type": "remove by frequency"} -{"id": 22699, "token": "\u0120Puerto", "merges": "\u0120Pu erto", "count": 313, "type": "remove by frequency"} -{"id": 48126, "token": "\u0120Lawson", "merges": "\u0120Law son", "count": 313, "type": "remove by frequency"} -{"id": 44384, "token": "\u0120Tukey", "merges": "\u0120Tu key", "count": 313, "type": "remove by frequency"} -{"id": 31472, "token": "\u0120Jesse", "merges": "\u0120Jes se", "count": 313, "type": "remove by frequency"} -{"id": 29452, "token": "\u0120Zion", "merges": "\u0120Z ion", "count": 313, "type": "remove by frequency"} -{"id": 44535, "token": "\u0120follicles", "merges": "\u0120follic les", "count": 314, "type": "remove by frequency"} -{"id": 38929, "token": "\u00ce\u00bb\u00ce\u00b7", "merges": "\u00ce\u00bb \u00ce\u00b7", "count": 314, "type": "remove by frequency"} -{"id": 34556, "token": "\u00e0\u00ba", "merges": "\u00e0 \u00ba", "count": 314, "type": "remove by frequency"} -{"id": 46646, "token": "\u0120Lipschitz", "merges": "\u0120Lips chitz", "count": 315, "type": "remove by frequency"} -{"id": 25223, "token": "\u0120protease", "merges": "\u0120prote ase", "count": 315, "type": "remove by frequency"} -{"id": 45729, "token": "\u0120Hammond", "merges": "\u0120Hamm ond", "count": 315, "type": "remove by frequency"} -{"id": 42005, "token": "\u0120Landau", "merges": "\u0120Land au", "count": 315, "type": "remove by frequency"} -{"id": 19987, "token": "\u0120Pitts", "merges": "\u0120Pitt s", "count": 315, "type": "remove by frequency"} -{"id": 39979, "token": "\u0120Wheat", "merges": "\u0120Whe at", "count": 315, "type": "remove by frequency"} -{"id": 49626, "token": "\u00ce\u00b8\u00ce\u00b7", "merges": "\u00ce\u00b8 \u00ce\u00b7", "count": 315, "type": "remove by frequency"} -{"id": 25990, "token": "\u0120Edinburgh", "merges": "\u0120Ed inburgh", "count": 316, "type": "remove by frequency"} -{"id": 35512, "token": "\u0120Mercedes", "merges": "\u0120Mer cedes", "count": 316, "type": "remove by frequency"} -{"id": 38436, "token": "\u0120Santiago", "merges": "\u0120S antiago", "count": 316, "type": "remove by frequency"} -{"id": 16700, "token": "\u0120Dallas", "merges": "\u0120D allas", "count": 316, "type": "remove by frequency"} -{"id": 41106, "token": "\u0120hecho", "merges": "\u0120he cho", "count": 316, "type": "remove by frequency"} -{"id": 41825, "token": "^).", "merges": "^ ).", "count": 316, "type": "remove by frequency"} -{"id": 47630, "token": "\u00c3\u00aet", "merges": "\u00c3\u00ae t", "count": 316, "type": "remove by frequency"} -{"id": 15667, "token": "$^{", "merges": "$ ^{", "count": 316, "type": "remove by frequency"} -{"id": 49429, "token": "\u0120hypersensitivity", "merges": "\u0120hypers ensitivity", "count": 317, "type": "remove by frequency"} -{"id": 41396, "token": "\u0120Contributions", "merges": "\u0120Cont ributions", "count": 317, "type": "remove by frequency"} -{"id": 28342, "token": "opathological", "merges": "opath ological", "count": 317, "type": "remove by frequency"} -{"id": 31935, "token": "\u0120Institutes", "merges": "\u0120Instit utes", "count": 317, "type": "remove by frequency"} -{"id": 40801, "token": "Arabidopsis", "merges": "Arab idopsis", "count": 317, "type": "remove by frequency"} -{"id": 37930, "token": "\u0120Nicole", "merges": "\u0120Nic ole", "count": 317, "type": "remove by frequency"} -{"id": 26865, "token": "\u0120NCAA", "merges": "\u0120NC AA", "count": 317, "type": "remove by frequency"} -{"id": 45357, "token": "\u0120cerebellar", "merges": "\u0120cereb ellar", "count": 318, "type": "remove by frequency"} -{"id": 38901, "token": "menopausal", "merges": "men opausal", "count": 318, "type": "remove by frequency"} -{"id": 39591, "token": "\u0120aberrant", "merges": "\u0120aberr ant", "count": 318, "type": "remove by frequency"} -{"id": 45041, "token": "\u0120Bolton", "merges": "\u0120Bol ton", "count": 318, "type": "remove by frequency"} -{"id": 35262, "token": "\u0120Sophie", "merges": "\u0120Soph ie", "count": 318, "type": "remove by frequency"} -{"id": 38297, "token": "\u0120Sevent", "merges": "\u0120Se vent", "count": 318, "type": "remove by frequency"} -{"id": 49958, "token": "\u0120surgically", "merges": "\u0120surg ically", "count": 319, "type": "remove by frequency"} -{"id": 43954, "token": "Certainly", "merges": "C ertainly", "count": 319, "type": "remove by frequency"} -{"id": 32000, "token": "\u0120Munich", "merges": "\u0120Mun ich", "count": 319, "type": "remove by frequency"} -{"id": 25862, "token": "FFIR", "merges": "FF IR", "count": 319, "type": "remove by frequency"} -{"id": 45114, "token": "]}$", "merges": "] }$", "count": 319, "type": "remove by frequency"} -{"id": 48914, "token": "\u0120\u00d8\u00a5", "merges": "\u0120\u00d8 \u00a5", "count": 319, "type": "remove by frequency"} -{"id": 37754, "token": "\u0120heterozygous", "merges": "\u0120heter ozygous", "count": 320, "type": "remove by frequency"} -{"id": 36365, "token": "\u0120Squadron", "merges": "\u0120Squad ron", "count": 320, "type": "remove by frequency"} -{"id": 45124, "token": "\u0120Drugs", "merges": "\u0120Drug s", "count": 320, "type": "remove by frequency"} -{"id": 25748, "token": "ofluorescence", "merges": "oflu orescence", "count": 321, "type": "remove by frequency"} -{"id": 38873, "token": "\u0120preclinical", "merges": "\u0120pre clinical", "count": 321, "type": "remove by frequency"} -{"id": 41079, "token": "\u0120Religious", "merges": "\u0120Rel igious", "count": 321, "type": "remove by frequency"} -{"id": 47356, "token": "\u0120proffered", "merges": "\u0120prof fered", "count": 321, "type": "remove by frequency"} -{"id": 34655, "token": "\u0120Caroline", "merges": "\u0120Carol ine", "count": 321, "type": "remove by frequency"} -{"id": 46074, "token": "\u0120papill", "merges": "\u0120pap ill", "count": 321, "type": "remove by frequency"} -{"id": 46151, "token": "\u0120Injury", "merges": "\u0120In jury", "count": 321, "type": "remove by frequency"} -{"id": 16146, "token": "\u0120Miami", "merges": "\u0120Mi ami", "count": 321, "type": "remove by frequency"} -{"id": 17870, "token": "\u0120APPE", "merges": "\u0120AP PE", "count": 321, "type": "remove by frequency"} -{"id": 50301, "token": "\u00e2\u0122\u00bf", "merges": "\u00e2\u0122 \u00bf", "count": 321, "type": "remove by frequency"} -{"id": 41604, "token": "}]$", "merges": "} ]$", "count": 321, "type": "remove by frequency"} -{"id": 39358, "token": "\u0120renormalization", "merges": "\u0120renormal ization", "count": 322, "type": "remove by frequency"} -{"id": 43787, "token": "\u0120photosensitive", "merges": "\u0120photos ensitive", "count": 322, "type": "remove by frequency"} -{"id": 44085, "token": "\u0120Connor", "merges": "\u0120Con nor", "count": 322, "type": "remove by frequency"} -{"id": 49536, "token": "\u0120Sutton", "merges": "\u0120S utton", "count": 322, "type": "remove by frequency"} -{"id": 31913, "token": "\u0120Antar", "merges": "\u0120Ant ar", "count": 322, "type": "remove by frequency"} -{"id": 49150, "token": "\u0120Leigh", "merges": "\u0120Le igh", "count": 322, "type": "remove by frequency"} -{"id": 49798, "token": "\u00cf\u0123\u00cf\u0129", "merges": "\u00cf\u0123 \u00cf\u0129", "count": 322, "type": "remove by frequency"} -{"id": 29830, "token": "\u0120\u00ce\u0137", "merges": "\u0120\u00ce \u0137", "count": 322, "type": "remove by frequency"} -{"id": 39588, "token": "\u0120progesterone", "merges": "\u0120pro gesterone", "count": 323, "type": "remove by frequency"} -{"id": 30545, "token": "\u0120sexuality", "merges": "\u0120sexual ity", "count": 323, "type": "remove by frequency"} -{"id": 25380, "token": "\u0120Richmond", "merges": "\u0120Rich mond", "count": 323, "type": "remove by frequency"} -{"id": 39102, "token": "\u0120Volks", "merges": "\u0120Vol ks", "count": 323, "type": "remove by frequency"} -{"id": 34539, "token": "\u0120Armed", "merges": "\u0120Arm ed", "count": 323, "type": "remove by frequency"} -{"id": 46888, "token": "\u0120Schematic", "merges": "\u0120Sc hematic", "count": 324, "type": "remove by frequency"} -{"id": 48408, "token": "\u0120Jennings", "merges": "\u0120Jenn ings", "count": 324, "type": "remove by frequency"} -{"id": 30036, "token": "mathtt", "merges": "mat htt", "count": 324, "type": "remove by frequency"} -{"id": 38366, "token": "\u0120\u00d1\u0123\u00d0\u00b5", "merges": "\u0120\u00d1\u0123 \u00d0\u00b5", "count": 324, "type": "remove by frequency"} -{"id": 39535, "token": "\u0120Buc", "merges": "\u0120B uc", "count": 324, "type": "remove by frequency"} -{"id": 31741, "token": "\u0120RCT", "merges": "\u0120R CT", "count": 324, "type": "remove by frequency"} -{"id": 33604, "token": "\u0120\u00c5\u00bce", "merges": "\u0120\u00c5\u00bc e", "count": 324, "type": "remove by frequency"} -{"id": 50386, "token": "\u00e3\u0122\u0127", "merges": "\u00e3\u0122 \u0127", "count": 324, "type": "remove by frequency"} -{"id": 48438, "token": "\u0120Excellent", "merges": "\u0120Ex cellent", "count": 325, "type": "remove by frequency"} -{"id": 27447, "token": "oprecip", "merges": "op recip", "count": 325, "type": "remove by frequency"} -{"id": 28979, "token": "\u0120Hallow", "merges": "\u0120Hall ow", "count": 325, "type": "remove by frequency"} -{"id": 32509, "token": "\u0120Gerald", "merges": "\u0120G erald", "count": 325, "type": "remove by frequency"} -{"id": 35460, "token": "\u0120THEY", "merges": "\u0120THE Y", "count": 325, "type": "remove by frequency"} -{"id": 43342, "token": "\u0120Cobb", "merges": "\u0120C obb", "count": 325, "type": "remove by frequency"} -{"id": 45098, "token": "printStackTrace", "merges": "print StackTrace", "count": 326, "type": "remove by frequency"} -{"id": 44924, "token": "\u0120reperfusion", "merges": "\u0120reper fusion", "count": 326, "type": "remove by frequency"} -{"id": 35120, "token": "\u0120immunoglob", "merges": "\u0120immun oglob", "count": 326, "type": "remove by frequency"} -{"id": 42991, "token": "\u0120Pinterest", "merges": "\u0120P interest", "count": 326, "type": "remove by frequency"} -{"id": 38881, "token": "\u0120Phillip", "merges": "\u0120Phill ip", "count": 326, "type": "remove by frequency"} -{"id": 45358, "token": "\u0120Prayer", "merges": "\u0120Pray er", "count": 326, "type": "remove by frequency"} -{"id": 30016, "token": "\u0120Jama", "merges": "\u0120J ama", "count": 326, "type": "remove by frequency"} -{"id": 44415, "token": "\u0120Heck", "merges": "\u0120He ck", "count": 326, "type": "remove by frequency"} -{"id": 43399, "token": "\u0120SUCH", "merges": "\u0120SU CH", "count": 326, "type": "remove by frequency"} -{"id": 35691, "token": "\u0120\u00cf\u0122\u00ce\u00b1", "merges": "\u0120\u00cf\u0122 \u00ce\u00b1", "count": 326, "type": "remove by frequency"} -{"id": 50005, "token": "\u0120Tud", "merges": "\u0120T ud", "count": 326, "type": "remove by frequency"} -{"id": 33750, "token": "\u0120.=", "merges": "\u0120. =", "count": 326, "type": "remove by frequency"} -{"id": 31127, "token": "\u0120Glasgow", "merges": "\u0120Gl asgow", "count": 327, "type": "remove by frequency"} -{"id": 34412, "token": "\u0120Brigade", "merges": "\u0120Brig ade", "count": 327, "type": "remove by frequency"} -{"id": 38809, "token": "\u0120Twelve", "merges": "\u0120Tw elve", "count": 327, "type": "remove by frequency"} -{"id": 49126, "token": "\u0120Docket", "merges": "\u0120D ocket", "count": 327, "type": "remove by frequency"} -{"id": 27850, "token": "\u0120Milan", "merges": "\u0120Mil an", "count": 327, "type": "remove by frequency"} -{"id": 37800, "token": "opically", "merges": "op ically", "count": 328, "type": "remove by frequency"} -{"id": 42171, "token": "\u0120Vernon", "merges": "\u0120Vern on", "count": 328, "type": "remove by frequency"} -{"id": 44070, "token": "\u0120___,", "merges": "\u0120___ ,", "count": 328, "type": "remove by frequency"} -{"id": 31038, "token": "\u0120\u00e2\u0122\u0142", "merges": "\u0120\u00e2\u0122 \u0142", "count": 328, "type": "remove by frequency"} -{"id": 48872, "token": "\u0120OCD", "merges": "\u0120O CD", "count": 328, "type": "remove by frequency"} -{"id": 36781, "token": "\u0120Socialist", "merges": "\u0120Social ist", "count": 329, "type": "remove by frequency"} -{"id": 26323, "token": "\u0120tambi\u00c3\u00a9n", "merges": "\u0120tamb i\u00c3\u00a9n", "count": 329, "type": "remove by frequency"} -{"id": 42305, "token": "\u0120Derby", "merges": "\u0120Der by", "count": 329, "type": "remove by frequency"} -{"id": 42944, "token": "]{};", "merges": "]{} ;", "count": 329, "type": "remove by frequency"} -{"id": 36686, "token": "\u0120Quantitative", "merges": "\u0120Quant itative", "count": 330, "type": "remove by frequency"} -{"id": 30617, "token": "\u0120mucosal", "merges": "\u0120muc osal", "count": 330, "type": "remove by frequency"} -{"id": 44495, "token": "\u0120Jerome", "merges": "\u0120Jer ome", "count": 330, "type": "remove by frequency"} -{"id": 36912, "token": "\u0120M\u00c3\u00a9", "merges": "\u0120M \u00c3\u00a9", "count": 330, "type": "remove by frequency"} -{"id": 33465, "token": "\u0120\u00e0\u00a4\u00b8", "merges": "\u0120\u00e0\u00a4 \u00b8", "count": 330, "type": "remove by frequency"} -{"id": 40307, "token": "\u0120EBV", "merges": "\u0120E BV", "count": 330, "type": "remove by frequency"} -{"id": 46802, "token": "*>(", "merges": "* >(", "count": 330, "type": "remove by frequency"} -{"id": 49796, "token": "\u0120corticosteroids", "merges": "\u0120corticoster oids", "count": 331, "type": "remove by frequency"} -{"id": 19802, "token": "\u0120Cleveland", "merges": "\u0120C leveland", "count": 331, "type": "remove by frequency"} -{"id": 10737, "token": "\u0120Amendment", "merges": "\u0120Am endment", "count": 331, "type": "remove by frequency"} -{"id": 39191, "token": "\u0120yogurt", "merges": "\u0120yog urt", "count": 331, "type": "remove by frequency"} -{"id": 38625, "token": "\u0120Fifty", "merges": "\u0120Fif ty", "count": 331, "type": "remove by frequency"} -{"id": 40537, "token": "\u0120Erik", "merges": "\u0120E rik", "count": 331, "type": "remove by frequency"} -{"id": 18679, "token": "\u0120s\u00c3\u00a5", "merges": "\u0120s \u00c3\u00a5", "count": 331, "type": "remove by frequency"} -{"id": 46817, "token": "\u0120Ramsey", "merges": "\u0120Ram sey", "count": 332, "type": "remove by frequency"} -{"id": 25164, "token": "\u0120Sixth", "merges": "\u0120Six th", "count": 332, "type": "remove by frequency"} -{"id": 43516, "token": "\u0120Oslo", "merges": "\u0120O slo", "count": 332, "type": "remove by frequency"} -{"id": 41489, "token": "otherapeutic", "merges": "othe rapeutic", "count": 333, "type": "remove by frequency"} -{"id": 35948, "token": "\u0120peritoneal", "merges": "\u0120per itoneal", "count": 333, "type": "remove by frequency"} -{"id": 26529, "token": "\u0120Testament", "merges": "\u0120Test ament", "count": 333, "type": "remove by frequency"} -{"id": 50087, "token": "\u0120Thornton", "merges": "\u0120Thorn ton", "count": 333, "type": "remove by frequency"} -{"id": 34071, "token": "\u0120glycerol", "merges": "\u0120glycer ol", "count": 333, "type": "remove by frequency"} -{"id": 47651, "token": "\u0120Managing", "merges": "\u0120Man aging", "count": 333, "type": "remove by frequency"} -{"id": 47589, "token": "\u0120genom", "merges": "\u0120gen om", "count": 333, "type": "remove by frequency"} -{"id": 22974, "token": "\\!\\", "merges": "\\! \\", "count": 333, "type": "remove by frequency"} -{"id": 40102, "token": "\u0120histology", "merges": "\u0120hist ology", "count": 334, "type": "remove by frequency"} -{"id": 7108, "token": "\u0120Americans", "merges": "\u0120Americ ans", "count": 334, "type": "remove by frequency"} -{"id": 46388, "token": "\u0120pleural", "merges": "\u0120ple ural", "count": 334, "type": "remove by frequency"} -{"id": 37601, "token": "\u0120Britt", "merges": "\u0120Br itt", "count": 334, "type": "remove by frequency"} -{"id": 47784, "token": "\u0120Nurse", "merges": "\u0120N urse", "count": 334, "type": "remove by frequency"} -{"id": 14986, "token": "\u0120\u00ce\u00bcg", "merges": "\u0120\u00ce\u00bc g", "count": 334, "type": "remove by frequency"} -{"id": 13061, "token": "_{(", "merges": "_{ (", "count": 334, "type": "remove by frequency"} -{"id": 15097, "token": "\u00c3\u013b", "merges": "\u00c3 \u013b", "count": 334, "type": "remove by frequency"} -{"id": 24163, "token": "Competing", "merges": "Comp eting", "count": 335, "type": "remove by frequency"} -{"id": 15812, "token": "\u0120Maryland", "merges": "\u0120Mary land", "count": 335, "type": "remove by frequency"} -{"id": 43522, "token": "\u0120Randall", "merges": "\u0120Rand all", "count": 335, "type": "remove by frequency"} -{"id": 44311, "token": "\u0120quien", "merges": "\u0120qu ien", "count": 335, "type": "remove by frequency"} -{"id": 49614, "token": "Docket", "merges": "D ocket", "count": 335, "type": "remove by frequency"} -{"id": 46533, "token": "\u0120CLAIM", "merges": "\u0120CLA IM", "count": 335, "type": "remove by frequency"} -{"id": 44484, "token": "\u0120Sart", "merges": "\u0120S art", "count": 335, "type": "remove by frequency"} -{"id": 9929, "token": "\u0120$-", "merges": "\u0120$ -", "count": 335, "type": "remove by frequency"} -{"id": 40469, "token": "\u0120immunological", "merges": "\u0120immun ological", "count": 336, "type": "remove by frequency"} -{"id": 42793, "token": "\u0120prefrontal", "merges": "\u0120pre frontal", "count": 336, "type": "remove by frequency"} -{"id": 34427, "token": "osexuality", "merges": "osex uality", "count": 336, "type": "remove by frequency"} -{"id": 46596, "token": "\u0120particul", "merges": "\u0120partic ul", "count": 336, "type": "remove by frequency"} -{"id": 41547, "token": "\u0120Speaking", "merges": "\u0120Spe aking", "count": 336, "type": "remove by frequency"} -{"id": 47420, "token": "\u0120Hassan", "merges": "\u0120Hass an", "count": 336, "type": "remove by frequency"} -{"id": 32519, "token": "\u0120juror", "merges": "\u0120jur or", "count": 336, "type": "remove by frequency"} -{"id": 47662, "token": "\u0120Lah", "merges": "\u0120L ah", "count": 336, "type": "remove by frequency"} -{"id": 13566, "token": "^{+", "merges": "^{ +", "count": 336, "type": "remove by frequency"} -{"id": 35838, "token": "Acknowledgements", "merges": "Acknowled gements", "count": 337, "type": "remove by frequency"} -{"id": 24129, "token": "\u0120Hampshire", "merges": "\u0120Ham pshire", "count": 337, "type": "remove by frequency"} -{"id": 44423, "token": "\u0120radially", "merges": "\u0120rad ially", "count": 337, "type": "remove by frequency"} -{"id": 31113, "token": "\u0120fibrin", "merges": "\u0120fibr in", "count": 337, "type": "remove by frequency"} -{"id": 39864, "token": "\u0120amyg", "merges": "\u0120am yg", "count": 337, "type": "remove by frequency"} -{"id": 29092, "token": "\u0120Joshua", "merges": "\u0120Josh ua", "count": 338, "type": "remove by frequency"} -{"id": 32425, "token": "\u0120Lesser", "merges": "\u0120Less er", "count": 338, "type": "remove by frequency"} -{"id": 39370, "token": "\u0120Kenny", "merges": "\u0120K enny", "count": 338, "type": "remove by frequency"} -{"id": 44193, "token": "\u00c3\u00a1nd", "merges": "\u00c3\u00a1 nd", "count": 338, "type": "remove by frequency"} -{"id": 33948, "token": "\u0120RBC", "merges": "\u0120R BC", "count": 338, "type": "remove by frequency"} -{"id": 30299, "token": "\u0120Citizens", "merges": "\u0120Cit izens", "count": 339, "type": "remove by frequency"} -{"id": 28244, "token": "\u0120Admiral", "merges": "\u0120Adm iral", "count": 339, "type": "remove by frequency"} -{"id": 38922, "token": "\u0120tachy", "merges": "\u0120t achy", "count": 339, "type": "remove by frequency"} -{"id": 23894, "token": "\u0120Uncle", "merges": "\u0120Un cle", "count": 339, "type": "remove by frequency"} -{"id": 22206, "token": "\u00cf\u0126\u00ce\u00b9\u00ce\u00ba", "merges": "\u00cf\u0126 \u00ce\u00b9\u00ce\u00ba", "count": 339, "type": "remove by frequency"} -{"id": 45921, "token": "\u0120USDA", "merges": "\u0120US DA", "count": 339, "type": "remove by frequency"} -{"id": 44836, "token": "\u0120habe", "merges": "\u0120hab e", "count": 339, "type": "remove by frequency"} -{"id": 18391, "token": "\u00e2\u012a\u0139", "merges": "\u00e2\u012a \u0139", "count": 339, "type": "remove by frequency"} -{"id": 41259, "token": "othelium", "merges": "othe lium", "count": 340, "type": "remove by frequency"} -{"id": 31534, "token": "\u0120Persian", "merges": "\u0120Pers ian", "count": 340, "type": "remove by frequency"} -{"id": 45777, "token": "\u0120tibial", "merges": "\u0120tib ial", "count": 340, "type": "remove by frequency"} -{"id": 45790, "token": "\u0120Olivia", "merges": "\u0120Ol ivia", "count": 340, "type": "remove by frequency"} -{"id": 23920, "token": "\u0120Carib", "merges": "\u0120Car ib", "count": 340, "type": "remove by frequency"} -{"id": 15207, "token": ")^{\\", "merges": ") ^{\\", "count": 340, "type": "remove by frequency"} -{"id": 43378, "token": "\u0120MTT", "merges": "\u0120M TT", "count": 340, "type": "remove by frequency"} -{"id": 41216, "token": "\u0120Abe", "merges": "\u0120A be", "count": 340, "type": "remove by frequency"} -{"id": 50656, "token": "\u00e5\u0124\u013c", "merges": "\u00e5\u0124 \u013c", "count": 340, "type": "remove by frequency"} -{"id": 49234, "token": "\u0120Hastings", "merges": "\u0120H astings", "count": 341, "type": "remove by frequency"} -{"id": 27899, "token": "\u0120Columbus", "merges": "\u0120Columb us", "count": 341, "type": "remove by frequency"} -{"id": 33808, "token": "\u0120Seventh", "merges": "\u0120Se venth", "count": 341, "type": "remove by frequency"} -{"id": 46262, "token": "\u0120Dalton", "merges": "\u0120Dal ton", "count": 341, "type": "remove by frequency"} -{"id": 29545, "token": "ligt", "merges": "lig t", "count": 341, "type": "remove by frequency"} -{"id": 51125, "token": "\u00e5\u0138\u00ab", "merges": "\u00e5\u0138 \u00ab", "count": 341, "type": "remove by frequency"} -{"id": 50356, "token": "\u00e2\u0138\u0125", "merges": "\u00e2\u0138 \u0125", "count": 341, "type": "remove by frequency"} -{"id": 49192, "token": "\u0120inactivated", "merges": "\u0120in activated", "count": 342, "type": "remove by frequency"} -{"id": 22592, "token": "\u0120monoclonal", "merges": "\u0120mon oclonal", "count": 342, "type": "remove by frequency"} -{"id": 40052, "token": "\u0120judicata", "merges": "\u0120judic ata", "count": 342, "type": "remove by frequency"} -{"id": 13169, "token": "\u0120Arizona", "merges": "\u0120Ari zona", "count": 342, "type": "remove by frequency"} -{"id": 39944, "token": "\u0120Vitamin", "merges": "\u0120Vit amin", "count": 342, "type": "remove by frequency"} -{"id": 47429, "token": "\u0120Fargo", "merges": "\u0120F argo", "count": 342, "type": "remove by frequency"} -{"id": 31071, "token": "\u0120Winn", "merges": "\u0120W inn", "count": 342, "type": "remove by frequency"} -{"id": 16595, "token": "\u0120\u00e2\u0122\u013e[", "merges": "\u0120\u00e2\u0122\u013e [", "count": 342, "type": "remove by frequency"} -{"id": 21433, "token": "\u0120\u00e0\u00a4\u0137", "merges": "\u0120\u00e0\u00a4 \u0137", "count": 342, "type": "remove by frequency"} -{"id": 38083, "token": "\u00c3\u00a4s", "merges": "\u00c3\u00a4 s", "count": 342, "type": "remove by frequency"} -{"id": 19002, "token": "})=", "merges": "}) =", "count": 342, "type": "remove by frequency"} -{"id": 30068, "token": "\u0120Regiment", "merges": "\u0120Reg iment", "count": 343, "type": "remove by frequency"} -{"id": 36327, "token": "\u0120heparin", "merges": "\u0120hepar in", "count": 343, "type": "remove by frequency"} -{"id": 25936, "token": "\u0120Redist", "merges": "\u0120Red ist", "count": 343, "type": "remove by frequency"} -{"id": 19141, "token": "cancers", "merges": "c ancers", "count": 343, "type": "remove by frequency"} -{"id": 37762, "token": "\u0120leurs", "merges": "\u0120le urs", "count": 343, "type": "remove by frequency"} -{"id": 23102, "token": "}},\\", "merges": "}} ,\\", "count": 343, "type": "remove by frequency"} -{"id": 48723, "token": "\u0120dysplasia", "merges": "\u0120dys plasia", "count": 344, "type": "remove by frequency"} -{"id": 48771, "token": "\u0120Bangkok", "merges": "\u0120Bang kok", "count": 344, "type": "remove by frequency"} -{"id": 33654, "token": "\u0120keratin", "merges": "\u0120ker atin", "count": 344, "type": "remove by frequency"} -{"id": 48982, "token": "\u0120Humans", "merges": "\u0120Hum ans", "count": 344, "type": "remove by frequency"} -{"id": 43795, "token": "\u0120Ordin", "merges": "\u0120Or din", "count": 344, "type": "remove by frequency"} -{"id": 46863, "token": "rogens", "merges": "rog ens", "count": 344, "type": "remove by frequency"} -{"id": 45641, "token": "\u0120toim", "merges": "\u0120to im", "count": 344, "type": "remove by frequency"} -{"id": 45557, "token": "\u0120Cous", "merges": "\u0120C ous", "count": 344, "type": "remove by frequency"} -{"id": 29398, "token": "\u0120adjuvant", "merges": "\u0120adj uvant", "count": 345, "type": "remove by frequency"} -{"id": 39498, "token": "\u0120salivary", "merges": "\u0120saliv ary", "count": 345, "type": "remove by frequency"} -{"id": 48465, "token": "\u0120Biomed", "merges": "\u0120Bi omed", "count": 345, "type": "remove by frequency"} -{"id": 32577, "token": "\u0120Sally", "merges": "\u0120S ally", "count": 345, "type": "remove by frequency"} -{"id": 24056, "token": "\u0120jeg", "merges": "\u0120j eg", "count": 345, "type": "remove by frequency"} -{"id": 39042, "token": "\u0120apparatuses", "merges": "\u0120apparatus es", "count": 346, "type": "remove by frequency"} -{"id": 45706, "token": "\u0120Analyses", "merges": "\u0120Anal yses", "count": 346, "type": "remove by frequency"} -{"id": 42707, "token": "\u0120Evangel", "merges": "\u0120Ev angel", "count": 346, "type": "remove by frequency"} -{"id": 35502, "token": "\u0120Divine", "merges": "\u0120Div ine", "count": 346, "type": "remove by frequency"} -{"id": 32164, "token": "\u0120Schr", "merges": "\u0120Sch r", "count": 346, "type": "remove by frequency"} -{"id": 32726, "token": "\u0120Mitt", "merges": "\u0120M itt", "count": 346, "type": "remove by frequency"} -{"id": 42767, "token": "]{\\", "merges": "] {\\", "count": 346, "type": "remove by frequency"} -{"id": 48357, "token": "\u0120Casual", "merges": "\u0120Cas ual", "count": 347, "type": "remove by frequency"} -{"id": 40476, "token": "\u0120fecal", "merges": "\u0120fe cal", "count": 347, "type": "remove by frequency"} -{"id": 37144, "token": "\u0120Evan", "merges": "\u0120Ev an", "count": 347, "type": "remove by frequency"} -{"id": 42586, "token": "\u0120Dans", "merges": "\u0120D ans", "count": 347, "type": "remove by frequency"} -{"id": 27079, "token": "\u0120COPD", "merges": "\u0120COP D", "count": 347, "type": "remove by frequency"} -{"id": 30630, "token": "\u0120Stephan", "merges": "\u0120Ste phan", "count": 348, "type": "remove by frequency"} -{"id": 29495, "token": "\u0120\u00ce\u00b5\u00cf\u0122", "merges": "\u0120\u00ce\u00b5 \u00cf\u0122", "count": 348, "type": "remove by frequency"} -{"id": 33519, "token": "\u0120Suff", "merges": "\u0120S uff", "count": 348, "type": "remove by frequency"} -{"id": 31479, "token": "\u0120Liz", "merges": "\u0120L iz", "count": 348, "type": "remove by frequency"} -{"id": 42512, "token": "\u0120GSH", "merges": "\u0120G SH", "count": 348, "type": "remove by frequency"} -{"id": 50338, "token": "\u00e2\u0136\u0127", "merges": "\u00e2\u0136 \u0127", "count": 348, "type": "remove by frequency"} -{"id": 24394, "token": "\u0120histological", "merges": "\u0120hist ological", "count": 349, "type": "remove by frequency"} -{"id": 19798, "token": "\u0120metastatic", "merges": "\u0120metast atic", "count": 349, "type": "remove by frequency"} -{"id": 31681, "token": "\u0120NSString", "merges": "\u0120NS String", "count": 349, "type": "remove by frequency"} -{"id": 35767, "token": "otoxicity", "merges": "otox icity", "count": 349, "type": "remove by frequency"} -{"id": 50240, "token": "\u0120Revised", "merges": "\u0120Rev ised", "count": 349, "type": "remove by frequency"} -{"id": 40002, "token": "ticas", "merges": "tic as", "count": 349, "type": "remove by frequency"} -{"id": 48403, "token": "\u0120Rousseau", "merges": "\u0120R ousseau", "count": 350, "type": "remove by frequency"} -{"id": 36965, "token": "\u0120Diane", "merges": "\u0120D iane", "count": 350, "type": "remove by frequency"} -{"id": 48488, "token": "\u0120JOHN", "merges": "\u0120J OHN", "count": 350, "type": "remove by frequency"} -{"id": 28675, "token": "\u0120Fla", "merges": "\u0120Fl a", "count": 350, "type": "remove by frequency"} -{"id": 20896, "token": "\u0120biomarkers", "merges": "\u0120biomark ers", "count": 351, "type": "remove by frequency"} -{"id": 43018, "token": "\u0120clinician", "merges": "\u0120clin ician", "count": 351, "type": "remove by frequency"} -{"id": 15636, "token": "\u0120Governor", "merges": "\u0120Govern or", "count": 351, "type": "remove by frequency"} -{"id": 20159, "token": "\u0120Orleans", "merges": "\u0120Or leans", "count": 351, "type": "remove by frequency"} -{"id": 32714, "token": "\u0120Toyota", "merges": "\u0120Toy ota", "count": 351, "type": "remove by frequency"} -{"id": 29654, "token": "\u0120Kash", "merges": "\u0120K ash", "count": 351, "type": "remove by frequency"} -{"id": 27913, "token": "\u0120microarray", "merges": "\u0120micro array", "count": 352, "type": "remove by frequency"} -{"id": 49237, "token": "\u0120ganglia", "merges": "\u0120gang lia", "count": 352, "type": "remove by frequency"} -{"id": 47716, "token": "\u0120hemod", "merges": "\u0120he mod", "count": 352, "type": "remove by frequency"} -{"id": 24292, "token": "\u0120\u00ce\u0143", "merges": "\u0120\u00ce \u0143", "count": 352, "type": "remove by frequency"} -{"id": 13655, "token": "\u0120phosphorylation", "merges": "\u0120phosphory lation", "count": 353, "type": "remove by frequency"} -{"id": 19056, "token": "Plaintiff", "merges": "Pl aintiff", "count": 353, "type": "remove by frequency"} -{"id": 39570, "token": "\u0120grafts", "merges": "\u0120graft s", "count": 353, "type": "remove by frequency"} -{"id": 30168, "token": "\u0120Buddh", "merges": "\u0120Budd h", "count": 353, "type": "remove by frequency"} -{"id": 49057, "token": "\u0120Trin", "merges": "\u0120Tr in", "count": 353, "type": "remove by frequency"} -{"id": 28649, "token": "\u00ce\u00bf\u00ce\u00bd", "merges": "\u00ce\u00bf \u00ce\u00bd", "count": 353, "type": "remove by frequency"} -{"id": 24029, "token": "\u00c3\u0131", "merges": "\u00c3 \u0131", "count": 353, "type": "remove by frequency"} -{"id": 43121, "token": "\u0120analogs", "merges": "\u0120analog s", "count": 354, "type": "remove by frequency"} -{"id": 43711, "token": "olateral", "merges": "ol ateral", "count": 354, "type": "remove by frequency"} -{"id": 27999, "token": "\u0120homot", "merges": "\u0120hom ot", "count": 354, "type": "remove by frequency"} -{"id": 43093, "token": "\u0120Nile", "merges": "\u0120N ile", "count": 354, "type": "remove by frequency"} -{"id": 34758, "token": "[{\\", "merges": "[ {\\", "count": 354, "type": "remove by frequency"} -{"id": 46380, "token": "\u0120despu\u00c3\u00a9s", "merges": "\u0120des pu\u00c3\u00a9s", "count": 355, "type": "remove by frequency"} -{"id": 42953, "token": "\u0120Edmund", "merges": "\u0120Ed mund", "count": 355, "type": "remove by frequency"} -{"id": 23239, "token": "\u0120Jake", "merges": "\u0120J ake", "count": 355, "type": "remove by frequency"} -{"id": 39402, "token": "k\u00c3\u00a4", "merges": "k \u00c3\u00a4", "count": 355, "type": "remove by frequency"} -{"id": 45521, "token": "\u0120grandes", "merges": "\u0120grand es", "count": 356, "type": "remove by frequency"} -{"id": 20498, "token": "\u0120Arabia", "merges": "\u0120Arab ia", "count": 356, "type": "remove by frequency"} -{"id": 45561, "token": "\u0120Aires", "merges": "\u0120A ires", "count": 356, "type": "remove by frequency"} -{"id": 52560, "token": "\u00e6\u00b4\u00a9", "merges": "\u00e6\u00b4 \u00a9", "count": 356, "type": "remove by frequency"} -{"id": 32576, "token": "\u0120Institutional", "merges": "\u0120In stitutional", "count": 357, "type": "remove by frequency"} -{"id": 41155, "token": "\u0120eukaryotic", "merges": "\u0120eukary otic", "count": 357, "type": "remove by frequency"} -{"id": 43870, "token": "\u0120neuropsych", "merges": "\u0120neuro psych", "count": 357, "type": "remove by frequency"} -{"id": 39776, "token": "\u0120Triton", "merges": "\u0120T riton", "count": 357, "type": "remove by frequency"} -{"id": 35665, "token": "\u0120Boris", "merges": "\u0120Bor is", "count": 357, "type": "remove by frequency"} -{"id": 46174, "token": "\u0120Jagu", "merges": "\u0120J agu", "count": 357, "type": "remove by frequency"} -{"id": 44534, "token": "\u0120Mug", "merges": "\u0120M ug", "count": 357, "type": "remove by frequency"} -{"id": 6922, "token": "}}_{", "merges": "}} _{", "count": 357, "type": "remove by frequency"} -{"id": 22457, "token": "downarrow", "merges": "down arrow", "count": 358, "type": "remove by frequency"} -{"id": 27045, "token": "\u0120Syscall", "merges": "\u0120S yscall", "count": 358, "type": "remove by frequency"} -{"id": 30593, "token": "\u0120amyloid", "merges": "\u0120amyl oid", "count": 358, "type": "remove by frequency"} -{"id": 22200, "token": "\u0120Charl", "merges": "\u0120Char l", "count": 358, "type": "remove by frequency"} -{"id": 22635, "token": "\\**", "merges": "\\ **", "count": 358, "type": "remove by frequency"} -{"id": 17732, "token": "\u0120\u00c3\u0127", "merges": "\u0120\u00c3 \u0127", "count": 358, "type": "remove by frequency"} -{"id": 29605, "token": "\u0120reactivity", "merges": "\u0120react ivity", "count": 359, "type": "remove by frequency"} -{"id": 25270, "token": "\u0120Finland", "merges": "\u0120Fin land", "count": 359, "type": "remove by frequency"} -{"id": 37225, "token": "\u0120Wilcox", "merges": "\u0120Wil cox", "count": 359, "type": "remove by frequency"} -{"id": 44807, "token": "\u0120questa", "merges": "\u0120quest a", "count": 359, "type": "remove by frequency"} -{"id": 42421, "token": "\u0120Tribe", "merges": "\u0120Trib e", "count": 359, "type": "remove by frequency"} -{"id": 46980, "token": "\u0120Rita", "merges": "\u0120R ita", "count": 359, "type": "remove by frequency"} -{"id": 48801, "token": "\u0120Nina", "merges": "\u0120N ina", "count": 359, "type": "remove by frequency"} -{"id": 29704, "token": "\u00ce\u00bb\u00ce\u00bb", "merges": "\u00ce\u00bb \u00ce\u00bb", "count": 359, "type": "remove by frequency"} -{"id": 28651, "token": "\u0120Iceland", "merges": "\u0120I celand", "count": 360, "type": "remove by frequency"} -{"id": 49892, "token": "\u0120Coca", "merges": "\u0120C oca", "count": 360, "type": "remove by frequency"} -{"id": 35113, "token": "\u00c4\u00be", "merges": "\u00c4 \u00be", "count": 360, "type": "remove by frequency"} -{"id": 14289, "token": "\u0120Philadelphia", "merges": "\u0120Phil adelphia", "count": 361, "type": "remove by frequency"} -{"id": 42392, "token": "\u0120flexion", "merges": "\u0120flex ion", "count": 361, "type": "remove by frequency"} -{"id": 32903, "token": "\u0120heeft", "merges": "\u0120he eft", "count": 361, "type": "remove by frequency"} -{"id": 34012, "token": "\u0120Midd", "merges": "\u0120M idd", "count": 361, "type": "remove by frequency"} -{"id": 46271, "token": "\u0120antipsych", "merges": "\u0120ant ipsych", "count": 362, "type": "remove by frequency"} -{"id": 22300, "token": "\u0120Likewise", "merges": "\u0120Like wise", "count": 362, "type": "remove by frequency"} -{"id": 46242, "token": "\u0120Sisters", "merges": "\u0120S isters", "count": 362, "type": "remove by frequency"} -{"id": 34327, "token": "\u0120Maggie", "merges": "\u0120Mag gie", "count": 362, "type": "remove by frequency"} -{"id": 21665, "token": "\u0120Chel", "merges": "\u0120C hel", "count": 362, "type": "remove by frequency"} -{"id": 42601, "token": "\u0120Riemannian", "merges": "\u0120Riemann ian", "count": 363, "type": "remove by frequency"} -{"id": 14732, "token": "\u0120COURT", "merges": "\u0120COU RT", "count": 363, "type": "remove by frequency"} -{"id": 47039, "token": "\u0120bara", "merges": "\u0120bar a", "count": 363, "type": "remove by frequency"} -{"id": 40980, "token": "r\u00c3\u0143a", "merges": "r \u00c3\u0143a", "count": 363, "type": "remove by frequency"} -{"id": 15431, "token": "\u0120macrophages", "merges": "\u0120macroph ages", "count": 364, "type": "remove by frequency"} -{"id": 18908, "token": "\u0120Switzerland", "merges": "\u0120Sw itzerland", "count": 364, "type": "remove by frequency"} -{"id": 31585, "token": "\u0120creatinine", "merges": "\u0120creat inine", "count": 364, "type": "remove by frequency"} -{"id": 34050, "token": "\u0120Diabetes", "merges": "\u0120Di abetes", "count": 364, "type": "remove by frequency"} -{"id": 46285, "token": "\u0120Eating", "merges": "\u0120E ating", "count": 364, "type": "remove by frequency"} -{"id": 38783, "token": "\u0120Alexandria", "merges": "\u0120Alexand ria", "count": 365, "type": "remove by frequency"} -{"id": 48860, "token": "\u0120mandibular", "merges": "\u0120mand ibular", "count": 365, "type": "remove by frequency"} -{"id": 32593, "token": "\u0120absorbance", "merges": "\u0120absorb ance", "count": 365, "type": "remove by frequency"} -{"id": 48785, "token": "\u0120glycogen", "merges": "\u0120glyc ogen", "count": 365, "type": "remove by frequency"} -{"id": 19688, "token": "\u0120Swedish", "merges": "\u0120Swed ish", "count": 365, "type": "remove by frequency"} -{"id": 49769, "token": "oblastic", "merges": "obl astic", "count": 365, "type": "remove by frequency"} -{"id": 38376, "token": "\u0120Alcohol", "merges": "\u0120Al cohol", "count": 365, "type": "remove by frequency"} -{"id": 45004, "token": "\u0120Benson", "merges": "\u0120B enson", "count": 365, "type": "remove by frequency"} -{"id": 34884, "token": "\u0120Chad", "merges": "\u0120Ch ad", "count": 365, "type": "remove by frequency"} -{"id": 53268, "token": "\u00e7\u00b2\u00a7", "merges": "\u00e7\u00b2 \u00a7", "count": 365, "type": "remove by frequency"} -{"id": 50020, "token": "\u00cf\u0137", "merges": "\u00cf \u0137", "count": 365, "type": "remove by frequency"} -{"id": 32241, "token": "\u0120Coalition", "merges": "\u0120Coal ition", "count": 366, "type": "remove by frequency"} -{"id": 46573, "token": "lipidemia", "merges": "lip idemia", "count": 366, "type": "remove by frequency"} -{"id": 41913, "token": "\u0120bosons", "merges": "\u0120bos ons", "count": 366, "type": "remove by frequency"} -{"id": 48215, "token": "\u0120Yorker", "merges": "\u0120York er", "count": 366, "type": "remove by frequency"} -{"id": 40152, "token": "\u0120Tracy", "merges": "\u0120Tr acy", "count": 366, "type": "remove by frequency"} -{"id": 30248, "token": "\u0120adenocarcinoma", "merges": "\u0120aden ocarcinoma", "count": 367, "type": "remove by frequency"} -{"id": 43230, "token": "\u0120Hawkins", "merges": "\u0120Haw kins", "count": 367, "type": "remove by frequency"} -{"id": 21647, "token": "\u0120Czech", "merges": "\u0120C zech", "count": 367, "type": "remove by frequency"} -{"id": 24427, "token": "\u00c8\u013bi", "merges": "\u00c8\u013b i", "count": 367, "type": "remove by frequency"} -{"id": 54606, "token": "\u00ef\u00bc\u0129", "merges": "\u00ef\u00bc \u0129", "count": 367, "type": "remove by frequency"} -{"id": 35371, "token": "\u0120epidermal", "merges": "\u0120epid ermal", "count": 368, "type": "remove by frequency"} -{"id": 45762, "token": "\u0120Writers", "merges": "\u0120Writ ers", "count": 368, "type": "remove by frequency"} -{"id": 44928, "token": "\u0120aspart", "merges": "\u0120as part", "count": 368, "type": "remove by frequency"} -{"id": 49849, "token": "\u0120Olson", "merges": "\u0120Ol son", "count": 368, "type": "remove by frequency"} -{"id": 28434, "token": "\u00ce\u00b5\u00cf\u0124", "merges": "\u00ce\u00b5 \u00cf\u0124", "count": 368, "type": "remove by frequency"} -{"id": 27983, "token": "\u0120Detective", "merges": "\u0120Detect ive", "count": 369, "type": "remove by frequency"} -{"id": 40424, "token": "\u0120Theodore", "merges": "\u0120Theod ore", "count": 369, "type": "remove by frequency"} -{"id": 47802, "token": "aucoup", "merges": "auc oup", "count": 369, "type": "remove by frequency"} -{"id": 38633, "token": "\u0120Enforcement", "merges": "\u0120En forcement", "count": 370, "type": "remove by frequency"} -{"id": 44124, "token": "\u0120Preston", "merges": "\u0120Prest on", "count": 370, "type": "remove by frequency"} -{"id": 16166, "token": "\u0120$('", "merges": "\u0120$ ('", "count": 370, "type": "remove by frequency"} -{"id": 34628, "token": "\u0120Biotechnology", "merges": "\u0120Bi otechnology", "count": 371, "type": "remove by frequency"} -{"id": 24258, "token": "\u0120Considering", "merges": "\u0120Consider ing", "count": 371, "type": "remove by frequency"} -{"id": 47271, "token": "Regardless", "merges": "Reg ardless", "count": 371, "type": "remove by frequency"} -{"id": 48467, "token": "\u0120Sinclair", "merges": "\u0120Sin clair", "count": 371, "type": "remove by frequency"} -{"id": 32905, "token": "\u0120Timothy", "merges": "\u0120Tim othy", "count": 371, "type": "remove by frequency"} -{"id": 29045, "token": "st\u00c3\u00a4", "merges": "st \u00c3\u00a4", "count": 371, "type": "remove by frequency"} -{"id": 30818, "token": "\u0120epigenetic", "merges": "\u0120epigen etic", "count": 372, "type": "remove by frequency"} -{"id": 43426, "token": "\u0120Nichols", "merges": "\u0120Nich ols", "count": 372, "type": "remove by frequency"} -{"id": 41940, "token": "\u0120Unter", "merges": "\u0120Un ter", "count": 372, "type": "remove by frequency"} -{"id": 16119, "token": "\u0120ovar", "merges": "\u0120o var", "count": 372, "type": "remove by frequency"} -{"id": 49142, "token": "\u0120Fuck", "merges": "\u0120F uck", "count": 372, "type": "remove by frequency"} -{"id": 42908, "token": "\u0120\u00f0\u0141\u013b\u0124", "merges": "\u0120\u00f0\u0141 \u013b\u0124", "count": 372, "type": "remove by frequency"} -{"id": 22783, "token": "\u0120Denmark", "merges": "\u0120Den mark", "count": 373, "type": "remove by frequency"} -{"id": 35944, "token": "\u0120Andrea", "merges": "\u0120And rea", "count": 373, "type": "remove by frequency"} -{"id": 42266, "token": "\u0120Edwin", "merges": "\u0120Ed win", "count": 373, "type": "remove by frequency"} -{"id": 44533, "token": "\u0120Ces", "merges": "\u0120C es", "count": 373, "type": "remove by frequency"} -{"id": 54603, "token": "\u00ef\u00bc\u0126", "merges": "\u00ef\u00bc \u0126", "count": 373, "type": "remove by frequency"} -{"id": 35984, "token": "\u0120mesenchymal", "merges": "\u0120mes enchymal", "count": 374, "type": "remove by frequency"} -{"id": 49834, "token": "\u0120stressors", "merges": "\u0120stress ors", "count": 374, "type": "remove by frequency"} -{"id": 21335, "token": "\u0120phosphat", "merges": "\u0120phosph at", "count": 374, "type": "remove by frequency"} -{"id": 34777, "token": "\u0120Warriors", "merges": "\u0120War riors", "count": 374, "type": "remove by frequency"} -{"id": 46901, "token": "\u0120Rafael", "merges": "\u0120Raf ael", "count": 374, "type": "remove by frequency"} -{"id": 34564, "token": "oblasts", "merges": "oblast s", "count": 374, "type": "remove by frequency"} -{"id": 33149, "token": "\u0120Flynn", "merges": "\u0120Fly nn", "count": 374, "type": "remove by frequency"} -{"id": 27705, "token": "\u0120Sox", "merges": "\u0120S ox", "count": 374, "type": "remove by frequency"} -{"id": 37795, "token": "\u0120CKD", "merges": "\u0120CK D", "count": 374, "type": "remove by frequency"} -{"id": 3822, "token": ")$.", "merges": ")$ .", "count": 374, "type": "remove by frequency"} -{"id": 48648, "token": "\u0120Janeiro", "merges": "\u0120Jane iro", "count": 375, "type": "remove by frequency"} -{"id": 38254, "token": "\u0120Apost", "merges": "\u0120Ap ost", "count": 375, "type": "remove by frequency"} -{"id": 49550, "token": "\u0120\\\\[", "merges": "\u0120\\ \\[", "count": 375, "type": "remove by frequency"} -{"id": 40998, "token": "\u0120Clayton", "merges": "\u0120Clay ton", "count": 376, "type": "remove by frequency"} -{"id": 46937, "token": "\u0120Teresa", "merges": "\u0120Te resa", "count": 376, "type": "remove by frequency"} -{"id": 27016, "token": "\u0120Buddha", "merges": "\u0120Budd ha", "count": 376, "type": "remove by frequency"} -{"id": 47298, "token": "\u0120Hoover", "merges": "\u0120Ho over", "count": 376, "type": "remove by frequency"} -{"id": 25242, "token": "\u00c3\u00bdch", "merges": "\u00c3\u00bd ch", "count": 376, "type": "remove by frequency"} -{"id": 49404, "token": "\u0120pathologies", "merges": "\u0120path ologies", "count": 377, "type": "remove by frequency"} -{"id": 17746, "token": "\u0120Recently", "merges": "\u0120Rec ently", "count": 377, "type": "remove by frequency"} -{"id": 31026, "token": "\u0120Glenn", "merges": "\u0120Gl enn", "count": 377, "type": "remove by frequency"} -{"id": 10128, "token": "\u0120Ohio", "merges": "\u0120Oh io", "count": 377, "type": "remove by frequency"} -{"id": 29778, "token": "\u0120Joan", "merges": "\u0120Jo an", "count": 377, "type": "remove by frequency"} -{"id": 31463, "token": "\u0120Ala", "merges": "\u0120Al a", "count": 377, "type": "remove by frequency"} -{"id": 24868, "token": "\u0120\u00c3\u00a9s", "merges": "\u0120\u00c3\u00a9 s", "count": 377, "type": "remove by frequency"} -{"id": 29636, "token": "\u0120Ug", "merges": "\u0120U g", "count": 377, "type": "remove by frequency"} -{"id": 42511, "token": "_.\"", "merges": "_ .\"", "count": 377, "type": "remove by frequency"} -{"id": 26308, "token": "\u0120preoperative", "merges": "\u0120pre operative", "count": 378, "type": "remove by frequency"} -{"id": 35566, "token": "\u0120viel", "merges": "\u0120v iel", "count": 378, "type": "remove by frequency"} -{"id": 38777, "token": "\u0120Fas", "merges": "\u0120F as", "count": 378, "type": "remove by frequency"} -{"id": 48324, "token": "\u0120AdS", "merges": "\u0120Ad S", "count": 378, "type": "remove by frequency"} -{"id": 47153, "token": "\u0120triglycerides", "merges": "\u0120triglycer ides", "count": 379, "type": "remove by frequency"} -{"id": 23004, "token": "\u0120Championships", "merges": "\u0120Championship s", "count": 379, "type": "remove by frequency"} -{"id": 30268, "token": "\u0120finitely", "merges": "\u0120fin itely", "count": 379, "type": "remove by frequency"} -{"id": 42890, "token": "\u0120Dawson", "merges": "\u0120Daw son", "count": 379, "type": "remove by frequency"} -{"id": 32073, "token": "\u0120hepar", "merges": "\u0120he par", "count": 379, "type": "remove by frequency"} -{"id": 27283, "token": "\u0120Jamie", "merges": "\u0120Jam ie", "count": 379, "type": "remove by frequency"} -{"id": 38923, "token": "\u0120Vaugh", "merges": "\u0120V augh", "count": 379, "type": "remove by frequency"} -{"id": 29073, "token": "\u0120phag", "merges": "\u0120ph ag", "count": 379, "type": "remove by frequency"} -{"id": 16943, "token": "[]{", "merges": "[ ]{", "count": 379, "type": "remove by frequency"} -{"id": 27988, "token": ">\";", "merges": "> \";", "count": 379, "type": "remove by frequency"} -{"id": 28450, "token": "\u0120translocation", "merges": "\u0120transl ocation", "count": 380, "type": "remove by frequency"} -{"id": 34358, "token": "\u0120Ambassador", "merges": "\u0120Amb assador", "count": 380, "type": "remove by frequency"} -{"id": 47185, "token": "\u0120Dickinson", "merges": "\u0120Dick inson", "count": 380, "type": "remove by frequency"} -{"id": 32042, "token": "\u0120Pierce", "merges": "\u0120Pier ce", "count": 380, "type": "remove by frequency"} -{"id": 46114, "token": "\u0120Examin", "merges": "\u0120Ex amin", "count": 380, "type": "remove by frequency"} -{"id": 41527, "token": "\u0120Giul", "merges": "\u0120Gi ul", "count": 380, "type": "remove by frequency"} -{"id": 36263, "token": "\u0120declaratory", "merges": "\u0120declar atory", "count": 381, "type": "remove by frequency"} -{"id": 45663, "token": "\u0120Borough", "merges": "\u0120Bor ough", "count": 381, "type": "remove by frequency"} -{"id": 49249, "token": "\u0120BGCOLOR", "merges": "\u0120BGC OLOR", "count": 381, "type": "remove by frequency"} -{"id": 33208, "token": "\u0120Zach", "merges": "\u0120Z ach", "count": 381, "type": "remove by frequency"} -{"id": 33370, "token": "\u0120BIA", "merges": "\u0120B IA", "count": 381, "type": "remove by frequency"} -{"id": 33911, "token": "\u0120Continental", "merges": "\u0120Contin ental", "count": 382, "type": "remove by frequency"} -{"id": 30535, "token": "\u0120plasticity", "merges": "\u0120plastic ity", "count": 382, "type": "remove by frequency"} -{"id": 48395, "token": "\u0120arthro", "merges": "\u0120arth ro", "count": 382, "type": "remove by frequency"} -{"id": 41942, "token": "\u0120Marin", "merges": "\u0120Mar in", "count": 382, "type": "remove by frequency"} -{"id": 34105, "token": "\u0120Bears", "merges": "\u0120B ears", "count": 382, "type": "remove by frequency"} -{"id": 32794, "token": "\u0120Noah", "merges": "\u0120No ah", "count": 382, "type": "remove by frequency"} -{"id": 32698, "token": "\u0120Troy", "merges": "\u0120T roy", "count": 382, "type": "remove by frequency"} -{"id": 31635, "token": "\u0120Tyr", "merges": "\u0120T yr", "count": 382, "type": "remove by frequency"} -{"id": 15558, "token": "\u0120Wisconsin", "merges": "\u0120Wis consin", "count": 383, "type": "remove by frequency"} -{"id": 42470, "token": "\u0120citrate", "merges": "\u0120cit rate", "count": 383, "type": "remove by frequency"} -{"id": 42627, "token": "\u00e3\u0123\u0137\u00e3\u0124\u0135", "merges": "\u00e3\u0123\u0137 \u00e3\u0124\u0135", "count": 383, "type": "remove by frequency"} -{"id": 44288, "token": "\u0120CDK", "merges": "\u0120CD K", "count": 383, "type": "remove by frequency"} -{"id": 47113, "token": "\u0120causative", "merges": "\u0120caus ative", "count": 384, "type": "remove by frequency"} -{"id": 49075, "token": "\u0120Eleven", "merges": "\u0120Ele ven", "count": 384, "type": "remove by frequency"} -{"id": 31316, "token": "\u0120Ellen", "merges": "\u0120Ell en", "count": 384, "type": "remove by frequency"} -{"id": 34701, "token": "\u0120Mum", "merges": "\u0120M um", "count": 384, "type": "remove by frequency"} -{"id": 42812, "token": "\u0120Und", "merges": "\u0120Un d", "count": 384, "type": "remove by frequency"} -{"id": 26147, "token": "\u00c2\u00bb,", "merges": "\u00c2\u00bb ,", "count": 384, "type": "remove by frequency"} -{"id": 18978, "token": "Meanwhile", "merges": "Mean while", "count": 385, "type": "remove by frequency"} -{"id": 33602, "token": "\u0120Tribune", "merges": "\u0120Trib une", "count": 385, "type": "remove by frequency"} -{"id": 13281, "token": "\u0120Islamic", "merges": "\u0120Islam ic", "count": 385, "type": "remove by frequency"} -{"id": 49826, "token": "\u0120fuckin", "merges": "\u0120fuck in", "count": 385, "type": "remove by frequency"} -{"id": 38295, "token": "\u0120Torres", "merges": "\u0120Tor res", "count": 385, "type": "remove by frequency"} -{"id": 37779, "token": "\u0120Libert", "merges": "\u0120Lib ert", "count": 385, "type": "remove by frequency"} -{"id": 44035, "token": "\u0120Huss", "merges": "\u0120H uss", "count": 385, "type": "remove by frequency"} -{"id": 30243, "token": "\u0120CVD", "merges": "\u0120C VD", "count": 385, "type": "remove by frequency"} -{"id": 12886, "token": "^{-\\", "merges": "^{ -\\", "count": 385, "type": "remove by frequency"} -{"id": 47341, "token": "a\u00c3\u00b1a", "merges": "a \u00c3\u00b1a", "count": 385, "type": "remove by frequency"} -{"id": 42252, "token": "\u00d7\u013f", "merges": "\u00d7 \u013f", "count": 385, "type": "remove by frequency"} -{"id": 36600, "token": "\u0120Directors", "merges": "\u0120Direct ors", "count": 386, "type": "remove by frequency"} -{"id": 45226, "token": "\u0120Weaver", "merges": "\u0120We aver", "count": 386, "type": "remove by frequency"} -{"id": 27145, "token": "\u0120Tommy", "merges": "\u0120Tom my", "count": 386, "type": "remove by frequency"} -{"id": 43644, "token": "srep", "merges": "s rep", "count": 386, "type": "remove by frequency"} -{"id": 40942, "token": "Apparently", "merges": "App arently", "count": 387, "type": "remove by frequency"} -{"id": 40067, "token": "\u0120toujours", "merges": "\u0120tou jours", "count": 387, "type": "remove by frequency"} -{"id": 9937, "token": "\u0120Despite", "merges": "\u0120Des pite", "count": 387, "type": "remove by frequency"} -{"id": 36126, "token": "\u0120Sanchez", "merges": "\u0120S anchez", "count": 387, "type": "remove by frequency"} -{"id": 43519, "token": "\u0120Marina", "merges": "\u0120Mar ina", "count": 387, "type": "remove by frequency"} -{"id": 38502, "token": "\u0120Marion", "merges": "\u0120Mar ion", "count": 387, "type": "remove by frequency"} -{"id": 49441, "token": "\u0120diced", "merges": "\u0120d iced", "count": 387, "type": "remove by frequency"} -{"id": 42808, "token": "\u0120Millenn", "merges": "\u0120Mill enn", "count": 388, "type": "remove by frequency"} -{"id": 19694, "token": "\u0120Veter", "merges": "\u0120V eter", "count": 388, "type": "remove by frequency"} -{"id": 41910, "token": "\u0120Isle", "merges": "\u0120Is le", "count": 388, "type": "remove by frequency"} -{"id": 19073, "token": "\u0120\u00ce\u00bd\u00ce\u00b1", "merges": "\u0120\u00ce\u00bd \u00ce\u00b1", "count": 388, "type": "remove by frequency"} -{"id": 46882, "token": "\u0120Kamp", "merges": "\u0120K amp", "count": 388, "type": "remove by frequency"} -{"id": 14070, "token": "\u0120Massachusetts", "merges": "\u0120Mass achusetts", "count": 389, "type": "remove by frequency"} -{"id": 33077, "token": "\u0120hypothal", "merges": "\u0120hyp othal", "count": 389, "type": "remove by frequency"} -{"id": 47870, "token": "\u0120Sadly", "merges": "\u0120Sad ly", "count": 389, "type": "remove by frequency"} -{"id": 32230, "token": "\u0120HPLC", "merges": "\u0120H PLC", "count": 389, "type": "remove by frequency"} -{"id": 33873, "token": "}(-", "merges": "} (-", "count": 389, "type": "remove by frequency"} -{"id": 38261, "token": "\u0120circadian", "merges": "\u0120circ adian", "count": 390, "type": "remove by frequency"} -{"id": 46696, "token": "\u0120Hospit", "merges": "\u0120H ospit", "count": 390, "type": "remove by frequency"} -{"id": 41502, "token": "\u0120rinsed", "merges": "\u0120rins ed", "count": 390, "type": "remove by frequency"} -{"id": 46484, "token": "\u0120Prol", "merges": "\u0120P rol", "count": 390, "type": "remove by frequency"} -{"id": 49528, "token": "\u0120prophylactic", "merges": "\u0120prophyl actic", "count": 391, "type": "remove by frequency"} -{"id": 36329, "token": "\u0120lymphocyte", "merges": "\u0120lymph ocyte", "count": 391, "type": "remove by frequency"} -{"id": 50190, "token": "\u0120Lorenzo", "merges": "\u0120Loren zo", "count": 391, "type": "remove by frequency"} -{"id": 26888, "token": "\u0120Brady", "merges": "\u0120Br ady", "count": 391, "type": "remove by frequency"} -{"id": 45401, "token": "texttt", "merges": "text tt", "count": 391, "type": "remove by frequency"} -{"id": 35607, "token": "\u0120Ranch", "merges": "\u0120R anch", "count": 391, "type": "remove by frequency"} -{"id": 41918, "token": "\u0120Krish", "merges": "\u0120Kr ish", "count": 391, "type": "remove by frequency"} -{"id": 34213, "token": "\u0120Ned", "merges": "\u0120N ed", "count": 391, "type": "remove by frequency"} -{"id": 49503, "token": "\u0120psychotic", "merges": "\u0120psych otic", "count": 392, "type": "remove by frequency"} -{"id": 47164, "token": "\u0120handgun", "merges": "\u0120hand gun", "count": 392, "type": "remove by frequency"} -{"id": 44152, "token": "\u0120Ferrari", "merges": "\u0120Ferr ari", "count": 392, "type": "remove by frequency"} -{"id": 22785, "token": "\u0120Belgium", "merges": "\u0120Bel gium", "count": 392, "type": "remove by frequency"} -{"id": 16496, "token": "\u0120Moscow", "merges": "\u0120M oscow", "count": 392, "type": "remove by frequency"} -{"id": 41167, "token": "\u0120elucidated", "merges": "\u0120elucid ated", "count": 393, "type": "remove by frequency"} -{"id": 22998, "token": "\u0120CONDITIONS", "merges": "\u0120CONDITION S", "count": 393, "type": "remove by frequency"} -{"id": 38005, "token": "\u0120Marian", "merges": "\u0120Mar ian", "count": 393, "type": "remove by frequency"} -{"id": 31340, "token": "\u0120Julie", "merges": "\u0120Jul ie", "count": 393, "type": "remove by frequency"} -{"id": 38713, "token": "\u0120Laur", "merges": "\u0120La ur", "count": 393, "type": "remove by frequency"} -{"id": 20629, "token": "\u0120[$\\", "merges": "\u0120[ $\\", "count": 393, "type": "remove by frequency"} -{"id": 37416, "token": "\u0120hypertrophy", "merges": "\u0120hypert rophy", "count": 394, "type": "remove by frequency"} -{"id": 47632, "token": "---|---|---", "merges": "---| ---|---", "count": 394, "type": "remove by frequency"} -{"id": 22807, "token": "Conclusions", "merges": "Con clusions", "count": 394, "type": "remove by frequency"} -{"id": 45269, "token": "\u0120ectopic", "merges": "\u0120ect opic", "count": 394, "type": "remove by frequency"} -{"id": 32398, "token": "\u0120Daddy", "merges": "\u0120D addy", "count": 394, "type": "remove by frequency"} -{"id": 45778, "token": "\u0120Brom", "merges": "\u0120B rom", "count": 394, "type": "remove by frequency"} -{"id": 50609, "token": "\u00e4\u00bf\u00ac", "merges": "\u00e4\u00bf \u00ac", "count": 394, "type": "remove by frequency"} -{"id": 47552, "token": "++]", "merges": "++ ]", "count": 394, "type": "remove by frequency"} -{"id": 33592, "token": "\u0120holomorphic", "merges": "\u0120hol omorphic", "count": 395, "type": "remove by frequency"} -{"id": 48612, "token": "\u0120Photography", "merges": "\u0120Phot ography", "count": 395, "type": "remove by frequency"} -{"id": 37534, "token": "\u0120obstructive", "merges": "\u0120obstruct ive", "count": 395, "type": "remove by frequency"} -{"id": 44811, "token": "\u0120cerebellum", "merges": "\u0120cere bellum", "count": 395, "type": "remove by frequency"} -{"id": 27235, "token": "\u0120Buddhist", "merges": "\u0120Budd hist", "count": 395, "type": "remove by frequency"} -{"id": 16795, "token": "\u0120Poland", "merges": "\u0120Pol and", "count": 395, "type": "remove by frequency"} -{"id": 41518, "token": "\u0120quanto", "merges": "\u0120quant o", "count": 395, "type": "remove by frequency"} -{"id": 46751, "token": "\u0120\u00d8\u00a7\u00d9\u0126\u00d8\u00aa", "merges": "\u0120\u00d8\u00a7\u00d9\u0126 \u00d8\u00aa", "count": 395, "type": "remove by frequency"} -{"id": 20272, "token": "\u0120HCC", "merges": "\u0120H CC", "count": 395, "type": "remove by frequency"} -{"id": 45713, "token": "\u0120SBP", "merges": "\u0120S BP", "count": 395, "type": "remove by frequency"} -{"id": 45979, "token": "\u0120Astronomy", "merges": "\u0120Astr onomy", "count": 396, "type": "remove by frequency"} -{"id": 47262, "token": "\u0120Haskell", "merges": "\u0120H askell", "count": 396, "type": "remove by frequency"} -{"id": 43344, "token": "\u0120s\u00c3\u00b3lo", "merges": "\u0120s\u00c3\u00b3 lo", "count": 396, "type": "remove by frequency"} -{"id": 25681, "token": "\u0120Rico", "merges": "\u0120R ico", "count": 396, "type": "remove by frequency"} -{"id": 46152, "token": "\u0120AFM", "merges": "\u0120AF M", "count": 396, "type": "remove by frequency"} -{"id": 14548, "token": ")_{", "merges": ") _{", "count": 396, "type": "remove by frequency"} -{"id": 36800, "token": "\u0120Gonzalez", "merges": "\u0120Gonz alez", "count": 397, "type": "remove by frequency"} -{"id": 48080, "token": "\u0120versch", "merges": "\u0120vers ch", "count": 397, "type": "remove by frequency"} -{"id": 36026, "token": "\u0120Isn", "merges": "\u0120Is n", "count": 397, "type": "remove by frequency"} -{"id": 49572, "token": "\u00c5\u00b1", "merges": "\u00c5 \u00b1", "count": 397, "type": "remove by frequency"} -{"id": 40788, "token": "\u0120Individuals", "merges": "\u0120Individual s", "count": 398, "type": "remove by frequency"} -{"id": 45560, "token": "\u0120dissected", "merges": "\u0120disse cted", "count": 398, "type": "remove by frequency"} -{"id": 28566, "token": "\u0120cardiomy", "merges": "\u0120cardi omy", "count": 398, "type": "remove by frequency"} -{"id": 18003, "token": "\u0120fibrobl", "merges": "\u0120fib robl", "count": 398, "type": "remove by frequency"} -{"id": 37117, "token": "\u0120Chrys", "merges": "\u0120Ch rys", "count": 398, "type": "remove by frequency"} -{"id": 28568, "token": "\u0120Clerk", "merges": "\u0120Cl erk", "count": 398, "type": "remove by frequency"} -{"id": 19950, "token": "\u0120Delhi", "merges": "\u0120Del hi", "count": 398, "type": "remove by frequency"} -{"id": 49730, "token": "\u0120XIII", "merges": "\u0120X III", "count": 398, "type": "remove by frequency"} -{"id": 27580, "token": "\u00ce\u00b1\u00ce\u00bd", "merges": "\u00ce\u00b1 \u00ce\u00bd", "count": 398, "type": "remove by frequency"} -{"id": 47286, "token": "\u0120PFS", "merges": "\u0120P FS", "count": 398, "type": "remove by frequency"} -{"id": 36519, "token": "\u0120oxidase", "merges": "\u0120oxid ase", "count": 399, "type": "remove by frequency"} -{"id": 38946, "token": "\u0120Forbes", "merges": "\u0120For bes", "count": 399, "type": "remove by frequency"} -{"id": 37509, "token": "it\u00c3\u00a4t", "merges": "it \u00c3\u00a4t", "count": 399, "type": "remove by frequency"} -{"id": 36860, "token": "ADVERTISEMENT", "merges": "AD VERTISEMENT", "count": 400, "type": "remove by frequency"} -{"id": 47821, "token": "\u0120disulfide", "merges": "\u0120dis ulfide", "count": 400, "type": "remove by frequency"} -{"id": 36845, "token": "\u0120Simmons", "merges": "\u0120Sim mons", "count": 401, "type": "remove by frequency"} -{"id": 42650, "token": "\u0120Willis", "merges": "\u0120Will is", "count": 401, "type": "remove by frequency"} -{"id": 36060, "token": "\u0120na\u00c3\u00afve", "merges": "\u0120na \u00c3\u00afve", "count": 401, "type": "remove by frequency"} -{"id": 49453, "token": "yzed", "merges": "y zed", "count": 401, "type": "remove by frequency"} -{"id": 48185, "token": "\u0120\u00ce\u0133", "merges": "\u0120\u00ce \u0133", "count": 401, "type": "remove by frequency"} -{"id": 44093, "token": "r\u00c3\u00a4", "merges": "r \u00c3\u00a4", "count": 401, "type": "remove by frequency"} -{"id": 22322, "token": "unnumbered", "merges": "unn umbered", "count": 402, "type": "remove by frequency"} -{"id": 34516, "token": "\u0120teaspoons", "merges": "\u0120teasp oons", "count": 402, "type": "remove by frequency"} -{"id": 29804, "token": "\u0120Angela", "merges": "\u0120Ang ela", "count": 402, "type": "remove by frequency"} -{"id": 33747, "token": "\u0120Ashley", "merges": "\u0120Ash ley", "count": 402, "type": "remove by frequency"} -{"id": 42635, "token": "\u0120cereb", "merges": "\u0120cere b", "count": 402, "type": "remove by frequency"} -{"id": 43051, "token": "\u0120Homer", "merges": "\u0120H omer", "count": 402, "type": "remove by frequency"} -{"id": 47797, "token": "\u0120Hopf", "merges": "\u0120Hop f", "count": 402, "type": "remove by frequency"} -{"id": 45411, "token": "\u0120Carb", "merges": "\u0120Car b", "count": 402, "type": "remove by frequency"} -{"id": 31300, "token": "\u0120Lys", "merges": "\u0120L ys", "count": 402, "type": "remove by frequency"} -{"id": 39795, "token": "\u0120Fernando", "merges": "\u0120Fern ando", "count": 403, "type": "remove by frequency"} -{"id": 48113, "token": "\u0120consegu", "merges": "\u0120conse gu", "count": 403, "type": "remove by frequency"} -{"id": 22880, "token": "\u0120\u00cf\u0126\u00ce\u00bf\u00cf\u0127", "merges": "\u0120\u00cf\u0126 \u00ce\u00bf\u00cf\u0127", "count": 403, "type": "remove by frequency"} -{"id": 14307, "token": "\u0120Turkey", "merges": "\u0120Tur key", "count": 403, "type": "remove by frequency"} -{"id": 48711, "token": "\u00e3\u0123\u013f\u00e3\u0123\u0128", "merges": "\u00e3\u0123\u013f \u00e3\u0123\u0128", "count": 403, "type": "remove by frequency"} -{"id": 36054, "token": "\u0120diese", "merges": "\u0120dies e", "count": 403, "type": "remove by frequency"} -{"id": 41716, "token": "clerotic", "merges": "cler otic", "count": 404, "type": "remove by frequency"} -{"id": 21740, "token": "\u0120Patri", "merges": "\u0120Pat ri", "count": 404, "type": "remove by frequency"} -{"id": 28439, "token": "vdots", "merges": "v dots", "count": 404, "type": "remove by frequency"} -{"id": 40437, "token": "}}\\\\", "merges": "}}\\ \\", "count": 404, "type": "remove by frequency"} -{"id": 39583, "token": "\u00c3\u0141en", "merges": "\u00c3\u0141 en", "count": 404, "type": "remove by frequency"} -{"id": 50034, "token": "\u00e2\u012d\u00af", "merges": "\u00e2\u012d \u00af", "count": 404, "type": "remove by frequency"} -{"id": 41474, "token": "\u0120Johannes", "merges": "\u0120Johann es", "count": 405, "type": "remove by frequency"} -{"id": 34334, "token": "\u0120Monroe", "merges": "\u0120Mon roe", "count": 405, "type": "remove by frequency"} -{"id": 36538, "token": "\u0120Zel", "merges": "\u0120Z el", "count": 405, "type": "remove by frequency"} -{"id": 37537, "token": "\u0120hematopoietic", "merges": "\u0120hemat opoietic", "count": 406, "type": "remove by frequency"} -{"id": 38552, "token": "\u0120transcriptome", "merges": "\u0120transcript ome", "count": 406, "type": "remove by frequency"} -{"id": 16342, "token": "\u0120Commonwealth", "merges": "\u0120Common wealth", "count": 406, "type": "remove by frequency"} -{"id": 17367, "token": "\u0120Ontario", "merges": "\u0120Ont ario", "count": 406, "type": "remove by frequency"} -{"id": 38103, "token": "\u0120telome", "merges": "\u0120tel ome", "count": 406, "type": "remove by frequency"} -{"id": 26494, "token": "\u0120aureus", "merges": "\u0120a ureus", "count": 406, "type": "remove by frequency"} -{"id": 38048, "token": "\u0120Judy", "merges": "\u0120Jud y", "count": 406, "type": "remove by frequency"} -{"id": 47094, "token": "\u0120Hume", "merges": "\u0120H ume", "count": 406, "type": "remove by frequency"} -{"id": 45201, "token": ")\\*", "merges": ")\\ *", "count": 406, "type": "remove by frequency"} -{"id": 22978, "token": "={\\", "merges": "= {\\", "count": 406, "type": "remove by frequency"} -{"id": 31020, "token": "\u0120europ\u00c3\u00a9", "merges": "\u0120europ \u00c3\u00a9", "count": 407, "type": "remove by frequency"} -{"id": 34084, "token": "\u0120Abbott", "merges": "\u0120Abb ott", "count": 407, "type": "remove by frequency"} -{"id": 30681, "token": "\u00ce\u00b5\u00ce\u00bd", "merges": "\u00ce\u00b5 \u00ce\u00bd", "count": 407, "type": "remove by frequency"} -{"id": 43199, "token": "Suddenly", "merges": "Sud denly", "count": 408, "type": "remove by frequency"} -{"id": 35839, "token": "\u0120preterm", "merges": "\u0120pre term", "count": 408, "type": "remove by frequency"} -{"id": 40703, "token": "\u0120Dismiss", "merges": "\u0120D ismiss", "count": 408, "type": "remove by frequency"} -{"id": 3854, "token": "\u0120Suppose", "merges": "\u0120S uppose", "count": 408, "type": "remove by frequency"} -{"id": 41701, "token": "\u0120Priest", "merges": "\u0120Pri est", "count": 408, "type": "remove by frequency"} -{"id": 48629, "token": "iphatic", "merges": "ip hatic", "count": 408, "type": "remove by frequency"} -{"id": 37383, "token": "\u0120toute", "merges": "\u0120tou te", "count": 408, "type": "remove by frequency"} -{"id": 47990, "token": "\u0120XII", "merges": "\u0120X II", "count": 408, "type": "remove by frequency"} -{"id": 21237, "token": "*^*", "merges": "* ^*", "count": 408, "type": "remove by frequency"} -{"id": 19974, "token": "\u0120Representative", "merges": "\u0120Represent ative", "count": 409, "type": "remove by frequency"} -{"id": 25787, "token": "\u0120autoimmune", "merges": "\u0120auto immune", "count": 409, "type": "remove by frequency"} -{"id": 24014, "token": "\u0120chromatin", "merges": "\u0120chrom atin", "count": 409, "type": "remove by frequency"} -{"id": 33357, "token": "\u0120Adrian", "merges": "\u0120Ad rian", "count": 409, "type": "remove by frequency"} -{"id": 39177, "token": "\u0120minced", "merges": "\u0120min ced", "count": 409, "type": "remove by frequency"} -{"id": 33893, "token": "\u0120maar", "merges": "\u0120ma ar", "count": 409, "type": "remove by frequency"} -{"id": 17896, "token": "bigl", "merges": "big l", "count": 409, "type": "remove by frequency"} -{"id": 21420, "token": "\u0120lymphocytes", "merges": "\u0120lymph ocytes", "count": 410, "type": "remove by frequency"} -{"id": 43225, "token": "\u0120pathologic", "merges": "\u0120path ologic", "count": 410, "type": "remove by frequency"} -{"id": 45950, "token": "\u00e3\u0123\u00a6\u00e3\u0123\u0126\u00e3\u0123\u0141", "merges": "\u00e3\u0123\u00a6\u00e3\u0123\u0126 \u00e3\u0123\u0141", "count": 410, "type": "remove by frequency"} -{"id": 27210, "token": "\u0120blotting", "merges": "\u0120blot ting", "count": 410, "type": "remove by frequency"} -{"id": 19330, "token": "\u0120Counsel", "merges": "\u0120Coun sel", "count": 410, "type": "remove by frequency"} -{"id": 41494, "token": "\u0120GBM", "merges": "\u0120G BM", "count": 410, "type": "remove by frequency"} -{"id": 30916, "token": "'$.", "merges": "' $.", "count": 410, "type": "remove by frequency"} -{"id": 37569, "token": "\u0120Knox", "merges": "\u0120Kn ox", "count": 411, "type": "remove by frequency"} -{"id": 11567, "token": "\u0120WARRANT", "merges": "\u0120W ARRANT", "count": 412, "type": "remove by frequency"} -{"id": 30913, "token": "\u0120Majesty", "merges": "\u0120Maj esty", "count": 412, "type": "remove by frequency"} -{"id": 45174, "token": "\u0120sentido", "merges": "\u0120sent ido", "count": 412, "type": "remove by frequency"} -{"id": 28464, "token": "\u0120sepsis", "merges": "\u0120se psis", "count": 412, "type": "remove by frequency"} -{"id": 43921, "token": "\u0120Wesley", "merges": "\u0120Wes ley", "count": 412, "type": "remove by frequency"} -{"id": 49918, "token": "\u0120Says", "merges": "\u0120S ays", "count": 412, "type": "remove by frequency"} -{"id": 36614, "token": "\u0120Elle", "merges": "\u0120El le", "count": 412, "type": "remove by frequency"} -{"id": 11367, "token": "\u0120\u00c2\u00b6", "merges": "\u0120\u00c2 \u00b6", "count": 412, "type": "remove by frequency"} -{"id": 36741, "token": "\u0120Christie", "merges": "\u0120Christ ie", "count": 413, "type": "remove by frequency"} -{"id": 42430, "token": "\u0120Marcel", "merges": "\u0120Mar cel", "count": 413, "type": "remove by frequency"} -{"id": 37869, "token": "\u0120Perez", "merges": "\u0120Pe rez", "count": 413, "type": "remove by frequency"} -{"id": 46586, "token": "\u0120Gomez", "merges": "\u0120G omez", "count": 413, "type": "remove by frequency"} -{"id": 44446, "token": "\u0120jej", "merges": "\u0120je j", "count": 413, "type": "remove by frequency"} -{"id": 45133, "token": "\u00ce\u00ba\u00ce\u00b5", "merges": "\u00ce\u00ba \u00ce\u00b5", "count": 413, "type": "remove by frequency"} -{"id": 43855, "token": "\u0120Carpenter", "merges": "\u0120Carp enter", "count": 414, "type": "remove by frequency"} -{"id": 37006, "token": "\u0120optically", "merges": "\u0120opt ically", "count": 414, "type": "remove by frequency"} -{"id": 42471, "token": "\u0120Frankfurt", "merges": "\u0120Frank furt", "count": 414, "type": "remove by frequency"} -{"id": 48088, "token": "\u0120Augustus", "merges": "\u0120August us", "count": 414, "type": "remove by frequency"} -{"id": 22085, "token": "\u0120Hawaii", "merges": "\u0120Hawai i", "count": 414, "type": "remove by frequency"} -{"id": 41835, "token": "\u0120Snyder", "merges": "\u0120S nyder", "count": 414, "type": "remove by frequency"} -{"id": 40824, "token": "\u0120Humph", "merges": "\u0120Hum ph", "count": 414, "type": "remove by frequency"} -{"id": 18308, "token": "\u0120Mayor", "merges": "\u0120May or", "count": 414, "type": "remove by frequency"} -{"id": 38804, "token": "\u0120\u00d9\u0125", "merges": "\u0120 \u00d9\u0125", "count": 414, "type": "remove by frequency"} -{"id": 36059, "token": "VERTISEMENT", "merges": "VERTIS EMENT", "count": 415, "type": "remove by frequency"} -{"id": 36595, "token": "plaintiff", "merges": "pl aintiff", "count": 415, "type": "remove by frequency"} -{"id": 45752, "token": "\u0120cranial", "merges": "\u0120cran ial", "count": 415, "type": "remove by frequency"} -{"id": 39505, "token": "\u0120Ahmad", "merges": "\u0120A hmad", "count": 415, "type": "remove by frequency"} -{"id": 36527, "token": "\u0120Coron", "merges": "\u0120Cor on", "count": 415, "type": "remove by frequency"} -{"id": 47296, "token": "NavBar", "merges": "Nav Bar", "count": 415, "type": "remove by frequency"} -{"id": 45101, "token": "\u0120Bess", "merges": "\u0120B ess", "count": 415, "type": "remove by frequency"} -{"id": 41379, "token": "\u0120Zar", "merges": "\u0120Z ar", "count": 415, "type": "remove by frequency"} -{"id": 39756, "token": "\u0120CCL", "merges": "\u0120C CL", "count": 415, "type": "remove by frequency"} -{"id": 37061, "token": "\u0120RCC", "merges": "\u0120R CC", "count": 415, "type": "remove by frequency"} -{"id": 25853, "token": "\u0120Princeton", "merges": "\u0120Princ eton", "count": 416, "type": "remove by frequency"} -{"id": 45280, "token": "\u0120Hilton", "merges": "\u0120H ilton", "count": 416, "type": "remove by frequency"} -{"id": 41333, "token": "\u0120Casino", "merges": "\u0120Cas ino", "count": 416, "type": "remove by frequency"} -{"id": 33686, "token": "\u0120Betty", "merges": "\u0120Bet ty", "count": 416, "type": "remove by frequency"} -{"id": 35864, "token": "\u0120LHC", "merges": "\u0120L HC", "count": 416, "type": "remove by frequency"} -{"id": 36498, "token": "\u0120preferentially", "merges": "\u0120pre ferentially", "count": 417, "type": "remove by frequency"} -{"id": 35719, "token": "\u0120antico", "merges": "\u0120ant ico", "count": 417, "type": "remove by frequency"} -{"id": 40047, "token": ")},\\", "merges": ")} ,\\", "count": 417, "type": "remove by frequency"} -{"id": 47090, "token": "\u0120Afterwards", "merges": "\u0120After wards", "count": 418, "type": "remove by frequency"} -{"id": 47246, "token": "\u0120Marvin", "merges": "\u0120Mar vin", "count": 418, "type": "remove by frequency"} -{"id": 46482, "token": "\u0120Pablo", "merges": "\u0120P ablo", "count": 418, "type": "remove by frequency"} -{"id": 7751, "token": "\u0120Iran", "merges": "\u0120I ran", "count": 418, "type": "remove by frequency"} -{"id": 36638, "token": "\u0120Ches", "merges": "\u0120C hes", "count": 418, "type": "remove by frequency"} -{"id": 48550, "token": "\u0120Howe", "merges": "\u0120How e", "count": 418, "type": "remove by frequency"} -{"id": 38890, "token": "\u0120rheumatoid", "merges": "\u0120rheumat oid", "count": 419, "type": "remove by frequency"} -{"id": 32902, "token": "\u0120echocardi", "merges": "\u0120ech ocardi", "count": 419, "type": "remove by frequency"} -{"id": 40381, "token": "\u0120Lots", "merges": "\u0120L ots", "count": 419, "type": "remove by frequency"} -{"id": 48171, "token": "\u0120Yas", "merges": "\u0120Y as", "count": 419, "type": "remove by frequency"} -{"id": 40035, "token": "\u0120NSW", "merges": "\u0120NS W", "count": 419, "type": "remove by frequency"} -{"id": 39481, "token": "//!", "merges": "// !", "count": 419, "type": "remove by frequency"} -{"id": 28888, "token": "\u00e0\u00b3", "merges": "\u00e0 \u00b3", "count": 419, "type": "remove by frequency"} -{"id": 20145, "token": "\u0120Margaret", "merges": "\u0120Marg aret", "count": 420, "type": "remove by frequency"} -{"id": 49398, "token": "\u0120toasted", "merges": "\u0120to asted", "count": 420, "type": "remove by frequency"} -{"id": 17785, "token": "\u0120Greece", "merges": "\u0120Gree ce", "count": 420, "type": "remove by frequency"} -{"id": 18314, "token": "\u0120cytos", "merges": "\u0120cyt os", "count": 420, "type": "remove by frequency"} -{"id": 45887, "token": "\u0120Diaz", "merges": "\u0120D iaz", "count": 420, "type": "remove by frequency"} -{"id": 26189, "token": "\u0120Vik", "merges": "\u0120V ik", "count": 420, "type": "remove by frequency"} -{"id": 30322, "token": "\u0120Rug", "merges": "\u0120R ug", "count": 420, "type": "remove by frequency"} -{"id": 34224, "token": "\u0120suppressor", "merges": "\u0120suppress or", "count": 421, "type": "remove by frequency"} -{"id": 22739, "token": "\u0120Rachel", "merges": "\u0120R achel", "count": 421, "type": "remove by frequency"} -{"id": 40012, "token": "\u0120Archae", "merges": "\u0120Arch ae", "count": 421, "type": "remove by frequency"} -{"id": 32396, "token": "\u0120Lakes", "merges": "\u0120L akes", "count": 421, "type": "remove by frequency"} -{"id": 26467, "token": "\u0120Portuguese", "merges": "\u0120Portug uese", "count": 422, "type": "remove by frequency"} -{"id": 46770, "token": "\u00e3\u0123\u0135\u00e3\u0123\u00a8\u00e3\u0123\u012e", "merges": "\u00e3\u0123\u0135\u00e3\u0123\u00a8 \u00e3\u0123\u012e", "count": 422, "type": "remove by frequency"} -{"id": 44787, "token": "\u0120phenolic", "merges": "\u0120phen olic", "count": 422, "type": "remove by frequency"} -{"id": 51637, "token": "\u00e5\u00bd\u00bf", "merges": "\u00e5\u00bd \u00bf", "count": 422, "type": "remove by frequency"} -{"id": 46296, "token": "\u0120Preliminary", "merges": "\u0120P reliminary", "count": 423, "type": "remove by frequency"} -{"id": 31565, "token": "\u0120Regardless", "merges": "\u0120Reg ardless", "count": 423, "type": "remove by frequency"} -{"id": 22469, "token": "\u0120Obviously", "merges": "\u0120Ob viously", "count": 423, "type": "remove by frequency"} -{"id": 29479, "token": "\u0120systolic", "merges": "\u0120syst olic", "count": 423, "type": "remove by frequency"} -{"id": 49424, "token": "\u0120cations", "merges": "\u0120c ations", "count": 423, "type": "remove by frequency"} -{"id": 50179, "token": "\u0120Sacred", "merges": "\u0120Sac red", "count": 423, "type": "remove by frequency"} -{"id": 37496, "token": "\u0120Standing", "merges": "\u0120Stand ing", "count": 424, "type": "remove by frequency"} -{"id": 48415, "token": "Notable", "merges": "Not able", "count": 424, "type": "remove by frequency"} -{"id": 24707, "token": "\u0120Bobby", "merges": "\u0120Bob by", "count": 424, "type": "remove by frequency"} -{"id": 24639, "token": "\u0120Barcelona", "merges": "\u0120B arcelona", "count": 425, "type": "remove by frequency"} -{"id": 40284, "token": "\u0120hypogly", "merges": "\u0120hyp ogly", "count": 425, "type": "remove by frequency"} -{"id": 34192, "token": "\u0120Castro", "merges": "\u0120Cast ro", "count": 425, "type": "remove by frequency"} -{"id": 31008, "token": "\u00ce\u00bc\u00ce\u0143", "merges": "\u00ce\u00bc \u00ce\u0143", "count": 425, "type": "remove by frequency"} -{"id": 30279, "token": "\u0120dendritic", "merges": "\u0120dend ritic", "count": 426, "type": "remove by frequency"} -{"id": 34819, "token": "\u0120Brent", "merges": "\u0120B rent", "count": 426, "type": "remove by frequency"} -{"id": 47449, "token": "npmjs", "merges": "npm js", "count": 426, "type": "remove by frequency"} -{"id": 49841, "token": "\u0120BCG", "merges": "\u0120B CG", "count": 426, "type": "remove by frequency"} -{"id": 28999, "token": "\u0120immunosupp", "merges": "\u0120immunos upp", "count": 427, "type": "remove by frequency"} -{"id": 40738, "token": "\u0120Roland", "merges": "\u0120Rol and", "count": 427, "type": "remove by frequency"} -{"id": 39652, "token": "\u0120Patrol", "merges": "\u0120Pat rol", "count": 427, "type": "remove by frequency"} -{"id": 17427, "token": "\u0120Manchester", "merges": "\u0120M anchester", "count": 428, "type": "remove by frequency"} -{"id": 21818, "token": "\u0120Melbourne", "merges": "\u0120Mel bourne", "count": 428, "type": "remove by frequency"} -{"id": 45278, "token": "\u0120morphisms", "merges": "\u0120morph isms", "count": 428, "type": "remove by frequency"} -{"id": 35445, "token": "\u0120Goldman", "merges": "\u0120Gold man", "count": 428, "type": "remove by frequency"} -{"id": 45323, "token": "\u0120Dodge", "merges": "\u0120D odge", "count": 428, "type": "remove by frequency"} -{"id": 42796, "token": "\u0120Flint", "merges": "\u0120Fl int", "count": 428, "type": "remove by frequency"} -{"id": 49143, "token": "\u0120Consc", "merges": "\u0120Cons c", "count": 428, "type": "remove by frequency"} -{"id": 36476, "token": "\u0120cutaneous", "merges": "\u0120cut aneous", "count": 429, "type": "remove by frequency"} -{"id": 36729, "token": "\u0120Charter", "merges": "\u0120Char ter", "count": 429, "type": "remove by frequency"} -{"id": 46205, "token": "\u0120mitral", "merges": "\u0120mit ral", "count": 429, "type": "remove by frequency"} -{"id": 48659, "token": "\u0120Barker", "merges": "\u0120B arker", "count": 429, "type": "remove by frequency"} -{"id": 37013, "token": "\u0120immuno", "merges": "\u0120immun o", "count": 429, "type": "remove by frequency"} -{"id": 42486, "token": "\u0120Tate", "merges": "\u0120T ate", "count": 429, "type": "remove by frequency"} -{"id": 33622, "token": "\u0120Nobel", "merges": "\u0120Nob el", "count": 430, "type": "remove by frequency"} -{"id": 34970, "token": "\u0120Sic", "merges": "\u0120S ic", "count": 430, "type": "remove by frequency"} -{"id": 29890, "token": "{(\\", "merges": "{ (\\", "count": 430, "type": "remove by frequency"} -{"id": 48015, "token": "\u0120Stephens", "merges": "\u0120Stephen s", "count": 431, "type": "remove by frequency"} -{"id": 32106, "token": "\u0120Sherman", "merges": "\u0120Sher man", "count": 431, "type": "remove by frequency"} -{"id": 22755, "token": "\u0120Chile", "merges": "\u0120Ch ile", "count": 431, "type": "remove by frequency"} -{"id": 50189, "token": "\u0120Hitch", "merges": "\u0120H itch", "count": 431, "type": "remove by frequency"} -{"id": 31513, "token": "\u0120Joel", "merges": "\u0120Jo el", "count": 431, "type": "remove by frequency"} -{"id": 48921, "token": "en\u00c3\u0143", "merges": "en \u00c3\u0143", "count": 431, "type": "remove by frequency"} -{"id": 42824, "token": "l\u00c3\u00a4", "merges": "l \u00c3\u00a4", "count": 431, "type": "remove by frequency"} -{"id": 28634, "token": "\u0120Metropolitan", "merges": "\u0120Met ropolitan", "count": 432, "type": "remove by frequency"} -{"id": 29166, "token": "Invitrogen", "merges": "Inv itrogen", "count": 432, "type": "remove by frequency"} -{"id": 20882, "token": "\u0120Danny", "merges": "\u0120D anny", "count": 432, "type": "remove by frequency"} -{"id": 41596, "token": "\u0120otros", "merges": "\u0120ot ros", "count": 432, "type": "remove by frequency"} -{"id": 40399, "token": "\u0120\u00ce\u0142", "merges": "\u0120\u00ce \u0142", "count": 432, "type": "remove by frequency"} -{"id": 53030, "token": "\u00e7\u013f\u00aa", "merges": "\u00e7\u013f \u00aa", "count": 432, "type": "remove by frequency"} -{"id": 22379, "token": "\u00e0\u00b1", "merges": "\u00e0 \u00b1", "count": 432, "type": "remove by frequency"} -{"id": 37314, "token": "\u0120pornography", "merges": "\u0120porn ography", "count": 433, "type": "remove by frequency"} -{"id": 32952, "token": "\u0120Bloomberg", "merges": "\u0120Bloom berg", "count": 433, "type": "remove by frequency"} -{"id": 28175, "token": "CUSSION", "merges": "CUSS ION", "count": 433, "type": "remove by frequency"} -{"id": 34530, "token": "\u0120Natal", "merges": "\u0120N atal", "count": 433, "type": "remove by frequency"} -{"id": 38899, "token": "\u0120Lund", "merges": "\u0120L und", "count": 433, "type": "remove by frequency"} -{"id": 6401, "token": "\u0120/**", "merges": "\u0120/ **", "count": 433, "type": "remove by frequency"} -{"id": 48451, "token": "\u00d5\u00a5", "merges": "\u00d5 \u00a5", "count": 433, "type": "remove by frequency"} -{"id": 41283, "token": "\u0120Tradition", "merges": "\u0120Tr adition", "count": 434, "type": "remove by frequency"} -{"id": 33341, "token": "\u0120Behind", "merges": "\u0120Be hind", "count": 434, "type": "remove by frequency"} -{"id": 26635, "token": "...](", "merges": "... ](", "count": 434, "type": "remove by frequency"} -{"id": 27709, "token": "\u0120Yale", "merges": "\u0120Y ale", "count": 434, "type": "remove by frequency"} -{"id": 41995, "token": "Whoa", "merges": "Who a", "count": 434, "type": "remove by frequency"} -{"id": 30192, "token": "\u0120asymptomatic", "merges": "\u0120asympt omatic", "count": 435, "type": "remove by frequency"} -{"id": 20956, "token": "\u0120Portland", "merges": "\u0120Port land", "count": 435, "type": "remove by frequency"} -{"id": 12194, "token": "\u0120Soviet", "merges": "\u0120Sov iet", "count": 435, "type": "remove by frequency"} -{"id": 34722, "token": "\u0120Trent", "merges": "\u0120T rent", "count": 435, "type": "remove by frequency"} -{"id": 40081, "token": "\u0120Quin", "merges": "\u0120Qu in", "count": 435, "type": "remove by frequency"} -{"id": 46766, "token": "\u00c3\u00bcnd", "merges": "\u00c3\u00bc nd", "count": 435, "type": "remove by frequency"} -{"id": 6319, "token": "\u00c2\u0143", "merges": "\u00c2 \u0143", "count": 435, "type": "remove by frequency"} -{"id": 36841, "token": "\u0120Benedict", "merges": "\u0120Bened ict", "count": 436, "type": "remove by frequency"} -{"id": 25260, "token": "\u0120Hebrew", "merges": "\u0120He brew", "count": 436, "type": "remove by frequency"} -{"id": 40200, "token": "\u0120andere", "merges": "\u0120and ere", "count": 436, "type": "remove by frequency"} -{"id": 17979, "token": "\u0120Vice", "merges": "\u0120V ice", "count": 436, "type": "remove by frequency"} -{"id": 9793, "token": "\u0120$[", "merges": "\u0120$ [", "count": 436, "type": "remove by frequency"} -{"id": 25641, "token": "\u0120homeostasis", "merges": "\u0120homeost asis", "count": 437, "type": "remove by frequency"} -{"id": 29986, "token": "\u0120Significant", "merges": "\u0120Sign ificant", "count": 437, "type": "remove by frequency"} -{"id": 31920, "token": "\u0120squamous", "merges": "\u0120squ amous", "count": 437, "type": "remove by frequency"} -{"id": 35530, "token": "\u0120fermion", "merges": "\u0120ferm ion", "count": 437, "type": "remove by frequency"} -{"id": 20758, "token": "CLUDING", "merges": "CLUD ING", "count": 437, "type": "remove by frequency"} -{"id": 39107, "token": "\u0120Claude", "merges": "\u0120Cla ude", "count": 437, "type": "remove by frequency"} -{"id": 19669, "token": "\u0120Billy", "merges": "\u0120B illy", "count": 437, "type": "remove by frequency"} -{"id": 45448, "token": "\u0120r\u00c3\u00a9p", "merges": "\u0120r\u00c3\u00a9 p", "count": 437, "type": "remove by frequency"} -{"id": 39037, "token": "\u0120Sew", "merges": "\u0120S ew", "count": 437, "type": "remove by frequency"} -{"id": 47111, "token": "\u0120Byr", "merges": "\u0120By r", "count": 437, "type": "remove by frequency"} -{"id": 42192, "token": "\u0120probative", "merges": "\u0120prob ative", "count": 438, "type": "remove by frequency"} -{"id": 43914, "token": "\u0120Families", "merges": "\u0120F amilies", "count": 438, "type": "remove by frequency"} -{"id": 41057, "token": "\u0120ainsi", "merges": "\u0120a insi", "count": 438, "type": "remove by frequency"} -{"id": 36253, "token": "\u0120Jill", "merges": "\u0120J ill", "count": 438, "type": "remove by frequency"} -{"id": 18537, "token": "\u00e0\u00a8", "merges": "\u00e0 \u00a8", "count": 438, "type": "remove by frequency"} -{"id": 48775, "token": "\u0120capricious", "merges": "\u0120capric ious", "count": 439, "type": "remove by frequency"} -{"id": 24641, "token": "\u0120albumin", "merges": "\u0120album in", "count": 439, "type": "remove by frequency"} -{"id": 18816, "token": "\u0120Hitler", "merges": "\u0120Hit ler", "count": 439, "type": "remove by frequency"} -{"id": 44948, "token": "\u0120Comedy", "merges": "\u0120Com edy", "count": 439, "type": "remove by frequency"} -{"id": 15924, "token": "\u0120Balt", "merges": "\u0120B alt", "count": 439, "type": "remove by frequency"} -{"id": 42870, "token": "\u0120Mayo", "merges": "\u0120May o", "count": 439, "type": "remove by frequency"} -{"id": 32443, "token": "\u0120\u00d9\u012c", "merges": "\u0120 \u00d9\u012c", "count": 439, "type": "remove by frequency"} -{"id": 50062, "token": "\u00d5\u00b8", "merges": "\u00d5 \u00b8", "count": 439, "type": "remove by frequency"} -{"id": 29474, "token": "\u00e3\u0123\u00a8\u00e3\u0123\u0126\u00e3\u0123\u0128", "merges": "\u00e3\u0123\u00a8\u00e3\u0123\u0126 \u00e3\u0123\u0128", "count": 440, "type": "remove by frequency"} -{"id": 48585, "token": "\u0120Fleming", "merges": "\u0120Fle ming", "count": 440, "type": "remove by frequency"} -{"id": 28307, "token": "\u0120acidic", "merges": "\u0120acid ic", "count": 440, "type": "remove by frequency"} -{"id": 46550, "token": "j\u00c3\u00a4", "merges": "j \u00c3\u00a4", "count": 440, "type": "remove by frequency"} -{"id": 48261, "token": "\u0120Goldberg", "merges": "\u0120Gold berg", "count": 441, "type": "remove by frequency"} -{"id": 28180, "token": "\u0120synthase", "merges": "\u0120synth ase", "count": 441, "type": "remove by frequency"} -{"id": 36562, "token": "\u0120Lastly", "merges": "\u0120Last ly", "count": 441, "type": "remove by frequency"} -{"id": 48125, "token": "\u0120Denis", "merges": "\u0120Den is", "count": 441, "type": "remove by frequency"} -{"id": 25635, "token": "\u0120Ruth", "merges": "\u0120R uth", "count": 441, "type": "remove by frequency"} -{"id": 44422, "token": "\u0120CURL", "merges": "\u0120C URL", "count": 441, "type": "remove by frequency"} -{"id": 29354, "token": "\u00cf\u0123\u00ce\u00ac", "merges": "\u00cf\u0123 \u00ce\u00ac", "count": 441, "type": "remove by frequency"} -{"id": 15734, "token": ")}{\\", "merges": ") }{\\", "count": 441, "type": "remove by frequency"} -{"id": 46666, "token": "\u00c5\u00bce", "merges": "\u00c5\u00bc e", "count": 441, "type": "remove by frequency"} -{"id": 49205, "token": "\u0120Psychological", "merges": "\u0120Psych ological", "count": 442, "type": "remove by frequency"} -{"id": 47824, "token": "\u0120Barton", "merges": "\u0120Bart on", "count": 442, "type": "remove by frequency"} -{"id": 37751, "token": "\u0120leptin", "merges": "\u0120lept in", "count": 442, "type": "remove by frequency"} -{"id": 31755, "token": "\u0120Logan", "merges": "\u0120Log an", "count": 442, "type": "remove by frequency"} -{"id": 42073, "token": "\u0120Payne", "merges": "\u0120Pay ne", "count": 442, "type": "remove by frequency"} -{"id": 44146, "token": "\u0120chemo", "merges": "\u0120chem o", "count": 442, "type": "remove by frequency"} -{"id": 24125, "token": "\u0120~*", "merges": "\u0120~ *", "count": 442, "type": "remove by frequency"} -{"id": 17765, "token": "){\\", "merges": ") {\\", "count": 442, "type": "remove by frequency"} -{"id": 33767, "token": "\u0120diastolic", "merges": "\u0120di astolic", "count": 443, "type": "remove by frequency"} -{"id": 32863, "token": "\u0120Hungarian", "merges": "\u0120Hung arian", "count": 443, "type": "remove by frequency"} -{"id": 36839, "token": "\u0120Cemetery", "merges": "\u0120C emetery", "count": 443, "type": "remove by frequency"} -{"id": 8308, "token": "\u0120Minister", "merges": "\u0120Min ister", "count": 443, "type": "remove by frequency"} -{"id": 38437, "token": "\u0120Johnston", "merges": "\u0120John ston", "count": 443, "type": "remove by frequency"} -{"id": 36097, "token": "\u0120Jenny", "merges": "\u0120J enny", "count": 443, "type": "remove by frequency"} -{"id": 25433, "token": "\u0120haben", "merges": "\u0120hab en", "count": 443, "type": "remove by frequency"} -{"id": 47800, "token": "\u0120USS", "merges": "\u0120US S", "count": 443, "type": "remove by frequency"} -{"id": 36048, "token": "Nevertheless", "merges": "Never theless", "count": 444, "type": "remove by frequency"} -{"id": 39035, "token": "\u0120Franco", "merges": "\u0120Franc o", "count": 444, "type": "remove by frequency"} -{"id": 37194, "token": "\u0120cystic", "merges": "\u0120cy stic", "count": 444, "type": "remove by frequency"} -{"id": 22646, "token": "\u0120MMP", "merges": "\u0120M MP", "count": 444, "type": "remove by frequency"} -{"id": 32209, "token": "\u0120MHC", "merges": "\u0120M HC", "count": 444, "type": "remove by frequency"} -{"id": 46276, "token": "h\u00c3\u00a4", "merges": "h \u00c3\u00a4", "count": 444, "type": "remove by frequency"} -{"id": 30021, "token": "\u0120Baptist", "merges": "\u0120Bapt ist", "count": 445, "type": "remove by frequency"} -{"id": 46681, "token": "\u0120Murder", "merges": "\u0120Mur der", "count": 445, "type": "remove by frequency"} -{"id": 35213, "token": "\u0120Sister", "merges": "\u0120S ister", "count": 445, "type": "remove by frequency"} -{"id": 33609, "token": "\u0120Dul", "merges": "\u0120D ul", "count": 445, "type": "remove by frequency"} -{"id": 43881, "token": "\u0120prostitution", "merges": "\u0120pro stitution", "count": 446, "type": "remove by frequency"} -{"id": 26132, "token": "\u0120biosynthesis", "merges": "\u0120bios ynthesis", "count": 446, "type": "remove by frequency"} -{"id": 41926, "token": "\u0120Fitzgerald", "merges": "\u0120Fitz gerald", "count": 446, "type": "remove by frequency"} -{"id": 26653, "token": "\u0120Amsterdam", "merges": "\u0120Am sterdam", "count": 446, "type": "remove by frequency"} -{"id": 35346, "token": "\u0120Firstly", "merges": "\u0120First ly", "count": 446, "type": "remove by frequency"} -{"id": 43457, "token": "\u0120acuity", "merges": "\u0120ac uity", "count": 446, "type": "remove by frequency"} -{"id": 37760, "token": "\u00d9\u0129\u00d8\u00a7", "merges": "\u00d9\u0129 \u00d8\u00a7", "count": 446, "type": "remove by frequency"} -{"id": 45691, "token": "\u0120Cunningham", "merges": "\u0120C unningham", "count": 447, "type": "remove by frequency"} -{"id": 39369, "token": "\u0120Combining", "merges": "\u0120Comb ining", "count": 447, "type": "remove by frequency"} -{"id": 31563, "token": "\u0120Vladimir", "merges": "\u0120Vlad imir", "count": 447, "type": "remove by frequency"} -{"id": 33315, "token": "\u0120Greens", "merges": "\u0120G reens", "count": 447, "type": "remove by frequency"} -{"id": 45374, "token": "\u0120Cran", "merges": "\u0120C ran", "count": 447, "type": "remove by frequency"} -{"id": 34278, "token": "\u0120affidavits", "merges": "\u0120affidav its", "count": 448, "type": "remove by frequency"} -{"id": 40879, "token": "\u0120Ernest", "merges": "\u0120Ern est", "count": 448, "type": "remove by frequency"} -{"id": 33825, "token": "\u0120redox", "merges": "\u0120red ox", "count": 448, "type": "remove by frequency"} -{"id": 42385, "token": "\u0120Basil", "merges": "\u0120Bas il", "count": 448, "type": "remove by frequency"} -{"id": 32920, "token": "\u0120Riley", "merges": "\u0120R iley", "count": 448, "type": "remove by frequency"} -{"id": 28177, "token": "\u0120Amer", "merges": "\u0120A mer", "count": 448, "type": "remove by frequency"} -{"id": 21227, "token": "\u00cf\u0126\u00ce\u00b9", "merges": "\u00cf\u0126 \u00ce\u00b9", "count": 448, "type": "remove by frequency"} -{"id": 47226, "token": "\u0120laterally", "merges": "\u0120later ally", "count": 449, "type": "remove by frequency"} -{"id": 29204, "token": "\u0120glutamate", "merges": "\u0120glut amate", "count": 449, "type": "remove by frequency"} -{"id": 45776, "token": "\u00e3\u0123\u00a7\u00e3\u0124\u0124", "merges": "\u00e3\u0123\u00a7 \u00e3\u0124\u0124", "count": 449, "type": "remove by frequency"} -{"id": 50089, "token": "ostomy", "merges": "ost omy", "count": 449, "type": "remove by frequency"} -{"id": 33240, "token": "\u0120Diana", "merges": "\u0120D iana", "count": 449, "type": "remove by frequency"} -{"id": 30864, "token": "\u0120Drew", "merges": "\u0120D rew", "count": 449, "type": "remove by frequency"} -{"id": 37888, "token": "\u0120WWE", "merges": "\u0120W WE", "count": 449, "type": "remove by frequency"} -{"id": 51376, "token": "\u00e5\u0143\u0125", "merges": "\u00e5\u0143 \u0125", "count": 449, "type": "remove by frequency"} -{"id": 41222, "token": "\u0120analogues", "merges": "\u0120analog ues", "count": 450, "type": "remove by frequency"} -{"id": 38607, "token": "\u0120inoculation", "merges": "\u0120inoc ulation", "count": 451, "type": "remove by frequency"} -{"id": 30778, "token": "\u0120Imagine", "merges": "\u0120Im agine", "count": 451, "type": "remove by frequency"} -{"id": 21753, "token": "\u0120Austria", "merges": "\u0120Aust ria", "count": 451, "type": "remove by frequency"} -{"id": 48929, "token": "\u0120futuro", "merges": "\u0120fut uro", "count": 451, "type": "remove by frequency"} -{"id": 37109, "token": "omethyl", "merges": "om ethyl", "count": 451, "type": "remove by frequency"} -{"id": 49693, "token": "\u0120Surviv", "merges": "\u0120Surv iv", "count": 451, "type": "remove by frequency"} -{"id": 11596, "token": "\u0120Irish", "merges": "\u0120Ir ish", "count": 451, "type": "remove by frequency"} -{"id": 38815, "token": "ASHINGTON", "merges": "ASH INGTON", "count": 452, "type": "remove by frequency"} -{"id": 27141, "token": "\u0120Baseball", "merges": "\u0120Base ball", "count": 452, "type": "remove by frequency"} -{"id": 41087, "token": "\u0120Herman", "merges": "\u0120H erman", "count": 452, "type": "remove by frequency"} -{"id": 30155, "token": "Figs", "merges": "Fig s", "count": 452, "type": "remove by frequency"} -{"id": 43935, "token": "\u0120Tonight", "merges": "\u0120Ton ight", "count": 453, "type": "remove by frequency"} -{"id": 34177, "token": "\u0120neopl", "merges": "\u0120ne opl", "count": 453, "type": "remove by frequency"} -{"id": 53750, "token": "\u00e8\u00a6\u0129", "merges": "\u00e8\u00a6 \u0129", "count": 453, "type": "remove by frequency"} -{"id": 33157, "token": "\u0120Renaissance", "merges": "\u0120Rena issance", "count": 454, "type": "remove by frequency"} -{"id": 41865, "token": "\u0120\u00c3\u00a9gal", "merges": "\u0120\u00c3\u00a9 gal", "count": 454, "type": "remove by frequency"} -{"id": 43695, "token": "\u0120chez", "merges": "\u0120che z", "count": 454, "type": "remove by frequency"} -{"id": 44009, "token": "k\u00c3\u00a9", "merges": "k \u00c3\u00a9", "count": 454, "type": "remove by frequency"} -{"id": 35266, "token": "\u0120Municipal", "merges": "\u0120Municip al", "count": 455, "type": "remove by frequency"} -{"id": 34313, "token": "\u0120Matthews", "merges": "\u0120Matthe ws", "count": 455, "type": "remove by frequency"} -{"id": 45648, "token": "\u0120nmol", "merges": "\u0120nm ol", "count": 455, "type": "remove by frequency"} -{"id": 23207, "token": "\u0120\u00d8\u00a3", "merges": "\u0120\u00d8 \u00a3", "count": 455, "type": "remove by frequency"} -{"id": 48671, "token": "\u0120nucleation", "merges": "\u0120nucle ation", "count": 456, "type": "remove by frequency"} -{"id": 49413, "token": "\u0120adsorbed", "merges": "\u0120adsor bed", "count": 456, "type": "remove by frequency"} -{"id": 23618, "token": "\u0120hypoxia", "merges": "\u0120hypox ia", "count": 456, "type": "remove by frequency"} -{"id": 38071, "token": "\u0120Irving", "merges": "\u0120Ir ving", "count": 456, "type": "remove by frequency"} -{"id": 32772, "token": "\u0120ADHD", "merges": "\u0120AD HD", "count": 456, "type": "remove by frequency"} -{"id": 38856, "token": "\u0120Pale", "merges": "\u0120P ale", "count": 456, "type": "remove by frequency"} -{"id": 49039, "token": "\u0120Ways", "merges": "\u0120W ays", "count": 456, "type": "remove by frequency"} -{"id": 36893, "token": "__>", "merges": "__ >", "count": 456, "type": "remove by frequency"} -{"id": 41919, "token": "\u0120cultivars", "merges": "\u0120cultiv ars", "count": 457, "type": "remove by frequency"} -{"id": 35852, "token": "\u0120Diseases", "merges": "\u0120Dise ases", "count": 457, "type": "remove by frequency"} -{"id": 44452, "token": "\u0120Shepherd", "merges": "\u0120She pherd", "count": 457, "type": "remove by frequency"} -{"id": 28982, "token": "\u0120Martinez", "merges": "\u0120Mart inez", "count": 457, "type": "remove by frequency"} -{"id": 21739, "token": "\u0120Liberal", "merges": "\u0120Lib eral", "count": 457, "type": "remove by frequency"} -{"id": 36262, "token": "\u0120Customs", "merges": "\u0120Custom s", "count": 457, "type": "remove by frequency"} -{"id": 14857, "token": "\u0120TNF", "merges": "\u0120T NF", "count": 457, "type": "remove by frequency"} -{"id": 27825, "token": "}}-", "merges": "}} -", "count": 457, "type": "remove by frequency"} -{"id": 27476, "token": "\u0120Figures", "merges": "\u0120Fig ures", "count": 458, "type": "remove by frequency"} -{"id": 36362, "token": "\u0120Eugene", "merges": "\u0120Eug ene", "count": 458, "type": "remove by frequency"} -{"id": 40466, "token": "\u0120cloves", "merges": "\u0120cl oves", "count": 458, "type": "remove by frequency"} -{"id": 9658, "token": "\u0120Among", "merges": "\u0120Am ong", "count": 458, "type": "remove by frequency"} -{"id": 46892, "token": "\u0120prong", "merges": "\u0120pr ong", "count": 458, "type": "remove by frequency"} -{"id": 50080, "token": "\u0120foraging", "merges": "\u0120for aging", "count": 459, "type": "remove by frequency"} -{"id": 29677, "token": "\u0120Aber", "merges": "\u0120A ber", "count": 459, "type": "remove by frequency"} -{"id": 41397, "token": "\u0120Zur", "merges": "\u0120Z ur", "count": 459, "type": "remove by frequency"} -{"id": 41848, "token": "\u0120Hag", "merges": "\u0120H ag", "count": 459, "type": "remove by frequency"} -{"id": 24738, "token": "\u0120metastases", "merges": "\u0120metast ases", "count": 460, "type": "remove by frequency"} -{"id": 42042, "token": "\u0120\u00e0\u00a4\u00aa", "merges": "\u0120\u00e0\u00a4 \u00aa", "count": 460, "type": "remove by frequency"} -{"id": 32760, "token": "\u0120Wildlife", "merges": "\u0120Wild life", "count": 461, "type": "remove by frequency"} -{"id": 42526, "token": "\u0120stably", "merges": "\u0120st ably", "count": 461, "type": "remove by frequency"} -{"id": 37702, "token": "\u0120Notre", "merges": "\u0120Not re", "count": 461, "type": "remove by frequency"} -{"id": 48855, "token": "\u0120Garn", "merges": "\u0120G arn", "count": 461, "type": "remove by frequency"} -{"id": 42964, "token": "\u0120meer", "merges": "\u0120me er", "count": 461, "type": "remove by frequency"} -{"id": 24891, "token": "\u0120coronavirus", "merges": "\u0120coron avirus", "count": 462, "type": "remove by frequency"} -{"id": 33226, "token": "\u0120antagonists", "merges": "\u0120antagon ists", "count": 462, "type": "remove by frequency"} -{"id": 18242, "token": "\u0120plasmid", "merges": "\u0120plasm id", "count": 462, "type": "remove by frequency"} -{"id": 36200, "token": "\u0120Legion", "merges": "\u0120Leg ion", "count": 462, "type": "remove by frequency"} -{"id": 15869, "token": "mapsto", "merges": "map sto", "count": 462, "type": "remove by frequency"} -{"id": 36297, "token": "\u0120Boyd", "merges": "\u0120Boy d", "count": 462, "type": "remove by frequency"} -{"id": 45213, "token": "\u0120IgA", "merges": "\u0120Ig A", "count": 462, "type": "remove by frequency"} -{"id": 46329, "token": "\u0120Constantin", "merges": "\u0120Constant in", "count": 463, "type": "remove by frequency"} -{"id": 23425, "token": "\u0120Catherine", "merges": "\u0120C atherine", "count": 463, "type": "remove by frequency"} -{"id": 35798, "token": "\u0120Brock", "merges": "\u0120Bro ck", "count": 463, "type": "remove by frequency"} -{"id": 29051, "token": "\u0120Quinn", "merges": "\u0120Qu inn", "count": 463, "type": "remove by frequency"} -{"id": 20429, "token": "mathit", "merges": "math it", "count": 463, "type": "remove by frequency"} -{"id": 34471, "token": "\u0120Mama", "merges": "\u0120M ama", "count": 463, "type": "remove by frequency"} -{"id": 21142, "token": "\u0120appellants", "merges": "\u0120appell ants", "count": 464, "type": "remove by frequency"} -{"id": 49731, "token": "\u0120DEAL", "merges": "\u0120DE AL", "count": 464, "type": "remove by frequency"} -{"id": 45722, "token": "\u0120Miz", "merges": "\u0120M iz", "count": 464, "type": "remove by frequency"} -{"id": 35776, "token": "\u0120Ful", "merges": "\u0120F ul", "count": 464, "type": "remove by frequency"} -{"id": 40380, "token": "\u00c3\u00b6k", "merges": "\u00c3\u00b6 k", "count": 464, "type": "remove by frequency"} -{"id": 46933, "token": "\u0120Planned", "merges": "\u0120Pl anned", "count": 465, "type": "remove by frequency"} -{"id": 36041, "token": "\u0120AMER", "merges": "\u0120AM ER", "count": 465, "type": "remove by frequency"} -{"id": 40019, "token": "\u0120Hale", "merges": "\u0120H ale", "count": 465, "type": "remove by frequency"} -{"id": 30823, "token": "\u0120lipoprotein", "merges": "\u0120lip oprotein", "count": 466, "type": "remove by frequency"} -{"id": 48322, "token": "\u0120Theoretical", "merges": "\u0120The oretical", "count": 466, "type": "remove by frequency"} -{"id": 36105, "token": "\u0120Patterson", "merges": "\u0120Pat terson", "count": 466, "type": "remove by frequency"} -{"id": 37103, "token": "\u0120Baldwin", "merges": "\u0120Bald win", "count": 466, "type": "remove by frequency"} -{"id": 22926, "token": "\u0120chiral", "merges": "\u0120ch iral", "count": 466, "type": "remove by frequency"} -{"id": 27061, "token": "\u0120Tibet", "merges": "\u0120Tib et", "count": 466, "type": "remove by frequency"} -{"id": 43484, "token": "\u0120Gul", "merges": "\u0120G ul", "count": 466, "type": "remove by frequency"} -{"id": 27349, "token": "\u0120MDA", "merges": "\u0120M DA", "count": 466, "type": "remove by frequency"} -{"id": 14457, "token": "bibr", "merges": "bib r", "count": 466, "type": "remove by frequency"} -{"id": 49502, "token": "\u0120Hoe", "merges": "\u0120H oe", "count": 466, "type": "remove by frequency"} -{"id": 20524, "token": "\u0120FITNESS", "merges": "\u0120FIT NESS", "count": 467, "type": "remove by frequency"} -{"id": 20767, "token": "\u0120!==", "merges": "\u0120! ==", "count": 467, "type": "remove by frequency"} -{"id": 36399, "token": ")\\[", "merges": ") \\[", "count": 467, "type": "remove by frequency"} -{"id": 35558, "token": "Abbreviations", "merges": "Ab breviations", "count": 468, "type": "remove by frequency"} -{"id": 33968, "token": "\u0120Differences", "merges": "\u0120D ifferences", "count": 468, "type": "remove by frequency"} -{"id": 39366, "token": "ivariable", "merges": "iv ariable", "count": 468, "type": "remove by frequency"} -{"id": 48021, "token": "\u0120Norris", "merges": "\u0120Nor ris", "count": 468, "type": "remove by frequency"} -{"id": 41171, "token": "\u0120gente", "merges": "\u0120gent e", "count": 469, "type": "remove by frequency"} -{"id": 45075, "token": "glich", "merges": "gl ich", "count": 469, "type": "remove by frequency"} -{"id": 17578, "token": "\u0120GFP", "merges": "\u0120G FP", "count": 469, "type": "remove by frequency"} -{"id": 41559, "token": "\u0120immobilized", "merges": "\u0120immob ilized", "count": 470, "type": "remove by frequency"} -{"id": 37125, "token": "\u0120Babylon", "merges": "\u0120Bab ylon", "count": 470, "type": "remove by frequency"} -{"id": 20463, "token": "\u0120ett\u00c3\u00a4", "merges": "\u0120ett \u00c3\u00a4", "count": 470, "type": "remove by frequency"} -{"id": 49019, "token": "\u0120Locke", "merges": "\u0120Loc ke", "count": 470, "type": "remove by frequency"} -{"id": 44302, "token": "\u0120Zimm", "merges": "\u0120Z imm", "count": 470, "type": "remove by frequency"} -{"id": 46511, "token": "\u0120Gunn", "merges": "\u0120G unn", "count": 470, "type": "remove by frequency"} -{"id": 21159, "token": "\u0120}{", "merges": "\u0120} {", "count": 470, "type": "remove by frequency"} -{"id": 44945, "token": "\u0120Lancaster", "merges": "\u0120Lanc aster", "count": 471, "type": "remove by frequency"} -{"id": 29841, "token": "\u0120apical", "merges": "\u0120ap ical", "count": 471, "type": "remove by frequency"} -{"id": 48954, "token": "\u0120Oval", "merges": "\u0120O val", "count": 471, "type": "remove by frequency"} -{"id": 46410, "token": "()", "merges": "() \">", "count": 608, "type": "remove by frequency"} -{"id": 49922, "token": "\u0120mindfulness", "merges": "\u0120mind fulness", "count": 609, "type": "remove by frequency"} -{"id": 39602, "token": "\u0120Hoffman", "merges": "\u0120Hoff man", "count": 609, "type": "remove by frequency"} -{"id": 50082, "token": "\u0120snar", "merges": "\u0120sn ar", "count": 609, "type": "remove by frequency"} -{"id": 46689, "token": "\u0120Hed", "merges": "\u0120H ed", "count": 609, "type": "remove by frequency"} -{"id": 39283, "token": "AtA", "merges": "At A", "count": 609, "type": "remove by frequency"} -{"id": 50363, "token": "\u00e2\u0138\u0130", "merges": "\u00e2\u0138 \u0130", "count": 609, "type": "remove by frequency"} -{"id": 24848, "token": "\u0120polymorphism", "merges": "\u0120polymorph ism", "count": 610, "type": "remove by frequency"} -{"id": 28852, "token": "\u0120Barnes", "merges": "\u0120Barn es", "count": 610, "type": "remove by frequency"} -{"id": 40278, "token": "\u0120Toast", "merges": "\u0120To ast", "count": 610, "type": "remove by frequency"} -{"id": 48405, "token": "\u0120Shall", "merges": "\u0120Sh all", "count": 610, "type": "remove by frequency"} -{"id": 46315, "token": "\u0120Gesch", "merges": "\u0120G esch", "count": 610, "type": "remove by frequency"} -{"id": 33489, "token": "\u0120Peninsula", "merges": "\u0120Pen insula", "count": 611, "type": "remove by frequency"} -{"id": 4689, "token": "}_{\\", "merges": "} _{\\", "count": 611, "type": "remove by frequency"} -{"id": 45588, "token": "\u0120suo", "merges": "\u0120su o", "count": 611, "type": "remove by frequency"} -{"id": 18485, "token": "\u0120GOP", "merges": "\u0120G OP", "count": 611, "type": "remove by frequency"} -{"id": 26150, "token": "\u0120Lagrangian", "merges": "\u0120Lag rangian", "count": 612, "type": "remove by frequency"} -{"id": 19011, "token": "ellants", "merges": "ell ants", "count": 612, "type": "remove by frequency"} -{"id": 33003, "token": "\u0120Dame", "merges": "\u0120D ame", "count": 612, "type": "remove by frequency"} -{"id": 41069, "token": "\u0120HBO", "merges": "\u0120H BO", "count": 612, "type": "remove by frequency"} -{"id": 29611, "token": "\u0120silencing", "merges": "\u0120sil encing", "count": 613, "type": "remove by frequency"} -{"id": 48505, "token": "\u0120Giovanni", "merges": "\u0120Gi ovanni", "count": 613, "type": "remove by frequency"} -{"id": 39836, "token": "\u0120Intra", "merges": "\u0120Int ra", "count": 613, "type": "remove by frequency"} -{"id": 43767, "token": "\u0120Yard", "merges": "\u0120Y ard", "count": 613, "type": "remove by frequency"} -{"id": 50233, "token": "\u0120StringBuilder", "merges": "\u0120String Builder", "count": 614, "type": "remove by frequency"} -{"id": 32833, "token": "\u0120CONTRACT", "merges": "\u0120CONTR ACT", "count": 614, "type": "remove by frequency"} -{"id": 49253, "token": "\u0120scler", "merges": "\u0120s cler", "count": 614, "type": "remove by frequency"} -{"id": 41660, "token": "\u0120Herz", "merges": "\u0120Her z", "count": 614, "type": "remove by frequency"} -{"id": 24520, "token": "\u0120Eur", "merges": "\u0120E ur", "count": 614, "type": "remove by frequency"} -{"id": 32101, "token": "\u0120McCarthy", "merges": "\u0120McC arthy", "count": 615, "type": "remove by frequency"} -{"id": 34626, "token": "\u0120intram", "merges": "\u0120int ram", "count": 615, "type": "remove by frequency"} -{"id": 36292, "token": "\u0120Edgar", "merges": "\u0120Ed gar", "count": 615, "type": "remove by frequency"} -{"id": 26062, "token": "\u0120Dennis", "merges": "\u0120D ennis", "count": 616, "type": "remove by frequency"} -{"id": 35608, "token": "\u0120quarks", "merges": "\u0120qu arks", "count": 616, "type": "remove by frequency"} -{"id": 11474, "token": "\u0120Though", "merges": "\u0120Th ough", "count": 616, "type": "remove by frequency"} -{"id": 45773, "token": "\u0120Eliot", "merges": "\u0120Eli ot", "count": 616, "type": "remove by frequency"} -{"id": 20164, "token": "\u0120Jimmy", "merges": "\u0120Jim my", "count": 616, "type": "remove by frequency"} -{"id": 35194, "token": "\u0120Said", "merges": "\u0120Sa id", "count": 616, "type": "remove by frequency"} -{"id": 40000, "token": "/>.", "merges": "/> .", "count": 616, "type": "remove by frequency"} -{"id": 47063, "token": "\u0120primates", "merges": "\u0120prim ates", "count": 617, "type": "remove by frequency"} -{"id": 35595, "token": "\u0120p\u00c3\u00bablic", "merges": "\u0120p \u00c3\u00bablic", "count": 617, "type": "remove by frequency"} -{"id": 34400, "token": "kubuntu", "merges": "k ubuntu", "count": 617, "type": "remove by frequency"} -{"id": 23662, "token": "\u0120Nathan", "merges": "\u0120N athan", "count": 617, "type": "remove by frequency"} -{"id": 34534, "token": "\u0120Witt", "merges": "\u0120W itt", "count": 617, "type": "remove by frequency"} -{"id": 45851, "token": "\u0120Mage", "merges": "\u0120M age", "count": 617, "type": "remove by frequency"} -{"id": 43789, "token": "\u0120Tay", "merges": "\u0120T ay", "count": 617, "type": "remove by frequency"} -{"id": 29897, "token": "\u0120Peterson", "merges": "\u0120Peters on", "count": 618, "type": "remove by frequency"} -{"id": 45246, "token": "\u0120Nielsen", "merges": "\u0120N ielsen", "count": 618, "type": "remove by frequency"} -{"id": 37611, "token": "\u0120Proced", "merges": "\u0120Pro ced", "count": 618, "type": "remove by frequency"} -{"id": 43082, "token": "\u0120Fluor", "merges": "\u0120Flu or", "count": 618, "type": "remove by frequency"} -{"id": 41888, "token": "\u0120Ernst", "merges": "\u0120Ern st", "count": 618, "type": "remove by frequency"} -{"id": 45744, "token": "\u0120Odys", "merges": "\u0120Od ys", "count": 618, "type": "remove by frequency"} -{"id": 45817, "token": "\u0120Oriental", "merges": "\u0120Ori ental", "count": 619, "type": "remove by frequency"} -{"id": 32791, "token": "\u0120ocular", "merges": "\u0120o cular", "count": 619, "type": "remove by frequency"} -{"id": 35203, "token": "\u0120Tas", "merges": "\u0120T as", "count": 619, "type": "remove by frequency"} -{"id": 40751, "token": "\u0120Basketball", "merges": "\u0120B asketball", "count": 620, "type": "remove by frequency"} -{"id": 31897, "token": "\u0120Lopez", "merges": "\u0120L opez", "count": 620, "type": "remove by frequency"} -{"id": 25326, "token": "\u00ce\u00bb\u00ce\u00bf", "merges": "\u00ce\u00bb \u00ce\u00bf", "count": 620, "type": "remove by frequency"} -{"id": 30246, "token": "\u0120isoforms", "merges": "\u0120iso forms", "count": 621, "type": "remove by frequency"} -{"id": 40177, "token": "\u0120Witness", "merges": "\u0120W itness", "count": 621, "type": "remove by frequency"} -{"id": 31049, "token": "\u0120Solomon", "merges": "\u0120Sol omon", "count": 621, "type": "remove by frequency"} -{"id": 26516, "token": "\u0120epigen", "merges": "\u0120ep igen", "count": 621, "type": "remove by frequency"} -{"id": 49315, "token": "\u0120dalla", "merges": "\u0120d alla", "count": 621, "type": "remove by frequency"} -{"id": 49893, "token": "\u0120Indy", "merges": "\u0120Ind y", "count": 621, "type": "remove by frequency"} -{"id": 31375, "token": "\u0120dissociation", "merges": "\u0120diss ociation", "count": 622, "type": "remove by frequency"} -{"id": 41531, "token": "\u0120rebounds", "merges": "\u0120reb ounds", "count": 622, "type": "remove by frequency"} -{"id": 6824, "token": "\u0120});", "merges": "\u0120} );", "count": 622, "type": "remove by frequency"} -{"id": 24045, "token": "\u0120hypothesized", "merges": "\u0120hypothes ized", "count": 623, "type": "remove by frequency"} -{"id": 22810, "token": "\u0120Johnny", "merges": "\u0120John ny", "count": 623, "type": "remove by frequency"} -{"id": 50011, "token": "\u00d8\u00ba", "merges": "\u00d8 \u00ba", "count": 623, "type": "remove by frequency"} -{"id": 40149, "token": "\u0120reasonableness", "merges": "\u0120reason ableness", "count": 624, "type": "remove by frequency"} -{"id": 42002, "token": "\u0120proximate", "merges": "\u0120pro ximate", "count": 624, "type": "remove by frequency"} -{"id": 36051, "token": "\u0120Knights", "merges": "\u0120Kn ights", "count": 624, "type": "remove by frequency"} -{"id": 43602, "token": "\u0120Berger", "merges": "\u0120Ber ger", "count": 624, "type": "remove by frequency"} -{"id": 42265, "token": "\u0120Curry", "merges": "\u0120Cur ry", "count": 624, "type": "remove by frequency"} -{"id": 44477, "token": "\u0120Pione", "merges": "\u0120P ione", "count": 624, "type": "remove by frequency"} -{"id": 45861, "token": "\u0120bony", "merges": "\u0120b ony", "count": 624, "type": "remove by frequency"} -{"id": 48057, "token": "ouden", "merges": "oud en", "count": 624, "type": "remove by frequency"} -{"id": 48493, "token": "\u0120Cres", "merges": "\u0120C res", "count": 624, "type": "remove by frequency"} -{"id": 50191, "token": "\u0120Pep", "merges": "\u0120P ep", "count": 625, "type": "remove by frequency"} -{"id": 47528, "token": "\u0120STRICT", "merges": "\u0120STR ICT", "count": 626, "type": "remove by frequency"} -{"id": 31508, "token": "\u0120Bever", "merges": "\u0120B ever", "count": 626, "type": "remove by frequency"} -{"id": 18184, "token": "\u00ce\u00af\u00ce\u00b1", "merges": "\u00ce\u00af \u00ce\u00b1", "count": 626, "type": "remove by frequency"} -{"id": 45147, "token": "\u0120SFR", "merges": "\u0120S FR", "count": 626, "type": "remove by frequency"} -{"id": 48735, "token": "\u0120Wedding", "merges": "\u0120Wed ding", "count": 627, "type": "remove by frequency"} -{"id": 41675, "token": "\u0120Shim", "merges": "\u0120Sh im", "count": 627, "type": "remove by frequency"} -{"id": 27438, "token": "\u0120Montgomery", "merges": "\u0120Mont gomery", "count": 628, "type": "remove by frequency"} -{"id": 42113, "token": "\u0120BUSINESS", "merges": "\u0120BUS INESS", "count": 628, "type": "remove by frequency"} -{"id": 26250, "token": "\u0120Springs", "merges": "\u0120Spr ings", "count": 628, "type": "remove by frequency"} -{"id": 52070, "token": "\u00e6\u0137\u0130", "merges": "\u00e6\u0137 \u0130", "count": 628, "type": "remove by frequency"} -{"id": 23340, "token": "\u0120Indonesia", "merges": "\u0120Indones ia", "count": 629, "type": "remove by frequency"} -{"id": 47907, "token": "\u0120Braun", "merges": "\u0120Bra un", "count": 629, "type": "remove by frequency"} -{"id": 45626, "token": "\u0120Exped", "merges": "\u0120Ex ped", "count": 629, "type": "remove by frequency"} -{"id": 10857, "token": "\u0120phosphory", "merges": "\u0120phosph ory", "count": 630, "type": "remove by frequency"} -{"id": 43998, "token": "\u0120countable", "merges": "\u0120count able", "count": 630, "type": "remove by frequency"} -{"id": 40215, "token": "\u0120affective", "merges": "\u0120affect ive", "count": 630, "type": "remove by frequency"} -{"id": 39708, "token": "\u0120reflux", "merges": "\u0120re flux", "count": 630, "type": "remove by frequency"} -{"id": 36055, "token": "\u0120Alleg", "merges": "\u0120Al leg", "count": 630, "type": "remove by frequency"} -{"id": 26121, "token": "\u0120Nep", "merges": "\u0120N ep", "count": 630, "type": "remove by frequency"} -{"id": 39935, "token": "\u0120Progressive", "merges": "\u0120Pro gressive", "count": 631, "type": "remove by frequency"} -{"id": 24620, "token": "\u0120Guidelines", "merges": "\u0120Gu idelines", "count": 631, "type": "remove by frequency"} -{"id": 36218, "token": "\u0120penicillin", "merges": "\u0120pen icillin", "count": 631, "type": "remove by frequency"} -{"id": 50158, "token": "\u0120desarroll", "merges": "\u0120des arroll", "count": 631, "type": "remove by frequency"} -{"id": 37874, "token": "\u0120paraffin", "merges": "\u0120par affin", "count": 631, "type": "remove by frequency"} -{"id": 34855, "token": "\u0120Andrews", "merges": "\u0120Andrew s", "count": 631, "type": "remove by frequency"} -{"id": 31083, "token": "\u0120Prison", "merges": "\u0120Pr ison", "count": 631, "type": "remove by frequency"} -{"id": 26419, "token": "\u0120Chuck", "merges": "\u0120Ch uck", "count": 631, "type": "remove by frequency"} -{"id": 29336, "token": "\u0120Cann", "merges": "\u0120C ann", "count": 631, "type": "remove by frequency"} -{"id": 39205, "token": "\u0120Gut", "merges": "\u0120G ut", "count": 631, "type": "remove by frequency"} -{"id": 48627, "token": "\u0120Essentially", "merges": "\u0120Ess entially", "count": 632, "type": "remove by frequency"} -{"id": 46994, "token": "\u0120thickening", "merges": "\u0120thick ening", "count": 632, "type": "remove by frequency"} -{"id": 34841, "token": "\u0120Heights", "merges": "\u0120He ights", "count": 632, "type": "remove by frequency"} -{"id": 19917, "token": "\u0120Barbara", "merges": "\u0120Bar bara", "count": 632, "type": "remove by frequency"} -{"id": 50016, "token": "\u0120Tut", "merges": "\u0120T ut", "count": 632, "type": "remove by frequency"} -{"id": 28562, "token": "\u0120ook", "merges": "\u0120o ok", "count": 632, "type": "remove by frequency"} -{"id": 36023, "token": "\u0120foreclosure", "merges": "\u0120fore closure", "count": 633, "type": "remove by frequency"} -{"id": 46355, "token": "\u0120postpartum", "merges": "\u0120post partum", "count": 633, "type": "remove by frequency"} -{"id": 41348, "token": "oresist", "merges": "ores ist", "count": 633, "type": "remove by frequency"} -{"id": 48226, "token": "\u0120caries", "merges": "\u0120car ies", "count": 633, "type": "remove by frequency"} -{"id": 37792, "token": "\u0120Lung", "merges": "\u0120L ung", "count": 633, "type": "remove by frequency"} -{"id": 38867, "token": "\u0120TiO", "merges": "\u0120Ti O", "count": 633, "type": "remove by frequency"} -{"id": 48807, "token": "\u0120\u00e0\u00b4", "merges": "\u0120 \u00e0\u00b4", "count": 633, "type": "remove by frequency"} -{"id": 40721, "token": "\u0120Hubble", "merges": "\u0120Hub ble", "count": 634, "type": "remove by frequency"} -{"id": 39339, "token": "\u0120Cotton", "merges": "\u0120Cot ton", "count": 634, "type": "remove by frequency"} -{"id": 23204, "token": "\u0120\u00d0\u00b4\u00d0\u00b0", "merges": "\u0120\u00d0\u00b4 \u00d0\u00b0", "count": 634, "type": "remove by frequency"} -{"id": 25975, "token": "\u0120Emma", "merges": "\u0120Em ma", "count": 634, "type": "remove by frequency"} -{"id": 23011, "token": "\u0120PhD", "merges": "\u0120Ph D", "count": 634, "type": "remove by frequency"} -{"id": 38164, "token": "\u0120HIS", "merges": "\u0120H IS", "count": 634, "type": "remove by frequency"} -{"id": 50305, "token": "\u00e2\u0124\u0123", "merges": "\u00e2\u0124 \u0123", "count": 634, "type": "remove by frequency"} -{"id": 50314, "token": "\u00e2\u012a\u0122", "merges": "\u00e2\u012a \u0122", "count": 634, "type": "remove by frequency"} -{"id": 21235, "token": "\u0120Jerem", "merges": "\u0120Je rem", "count": 635, "type": "remove by frequency"} -{"id": 46897, "token": "juvant", "merges": "ju vant", "count": 635, "type": "remove by frequency"} -{"id": 21638, "token": "\u0120Terry", "merges": "\u0120T erry", "count": 635, "type": "remove by frequency"} -{"id": 46884, "token": "\u0120gibt", "merges": "\u0120gib t", "count": 635, "type": "remove by frequency"} -{"id": 50031, "token": "\u00c3\u00a1d", "merges": "\u00c3\u00a1 d", "count": 635, "type": "remove by frequency"} -{"id": 49113, "token": "\u00e1\u00be", "merges": "\u00e1 \u00be", "count": 635, "type": "remove by frequency"} -{"id": 35257, "token": "\u0120\u00d5", "merges": "\u0120 \u00d5", "count": 635, "type": "remove by frequency"} -{"id": 18856, "token": "\\;", "merges": "\\ ;", "count": 635, "type": "remove by frequency"} -{"id": 43674, "token": "\u0120sophomore", "merges": "\u0120soph omore", "count": 636, "type": "remove by frequency"} -{"id": 42797, "token": "\u0120vagina", "merges": "\u0120vag ina", "count": 636, "type": "remove by frequency"} -{"id": 45810, "token": "\u0120erotic", "merges": "\u0120er otic", "count": 636, "type": "remove by frequency"} -{"id": 39523, "token": "\u0120Pride", "merges": "\u0120Pr ide", "count": 636, "type": "remove by frequency"} -{"id": 48784, "token": "\u00e3\u0123\u00aa\u00e3\u0123\u0131", "merges": "\u00e3\u0123\u00aa \u00e3\u0123\u0131", "count": 636, "type": "remove by frequency"} -{"id": 45931, "token": "\u0120Vera", "merges": "\u0120V era", "count": 636, "type": "remove by frequency"} -{"id": 46632, "token": "\u0120THEM", "merges": "\u0120THE M", "count": 636, "type": "remove by frequency"} -{"id": 21380, "token": "v\u00c3\u00a4", "merges": "v \u00c3\u00a4", "count": 636, "type": "remove by frequency"} -{"id": 37349, "token": "\u0120Engineers", "merges": "\u0120Engine ers", "count": 637, "type": "remove by frequency"} -{"id": 39031, "token": "\u0120vertebral", "merges": "\u0120verte bral", "count": 637, "type": "remove by frequency"} -{"id": 45356, "token": "\u0120Observe", "merges": "\u0120Obs erve", "count": 637, "type": "remove by frequency"} -{"id": 44990, "token": "\u0120Hew", "merges": "\u0120He w", "count": 637, "type": "remove by frequency"} -{"id": 40109, "token": "\u0120h\u00c3\u00b6", "merges": "\u0120h \u00c3\u00b6", "count": 637, "type": "remove by frequency"} -{"id": 33005, "token": "\u0120Rac", "merges": "\u0120R ac", "count": 637, "type": "remove by frequency"} -{"id": 35146, "token": "\u0120CRP", "merges": "\u0120CR P", "count": 637, "type": "remove by frequency"} -{"id": 35290, "token": "\u0120Rum", "merges": "\u0120R um", "count": 637, "type": "remove by frequency"} -{"id": 42931, "token": "\u00c2\u00b5m", "merges": "\u00c2\u00b5 m", "count": 637, "type": "remove by frequency"} -{"id": 23041, "token": "\u0120Historic", "merges": "\u0120Histor ic", "count": 638, "type": "remove by frequency"} -{"id": 25188, "token": "\u0120worden", "merges": "\u0120word en", "count": 638, "type": "remove by frequency"} -{"id": 38020, "token": "\u0120Bend", "merges": "\u0120B end", "count": 638, "type": "remove by frequency"} -{"id": 42610, "token": "\u0120STEM", "merges": "\u0120ST EM", "count": 638, "type": "remove by frequency"} -{"id": 50359, "token": "\u00e2\u0138\u012b", "merges": "\u00e2\u0138 \u012b", "count": 638, "type": "remove by frequency"} -{"id": 41366, "token": "\u0120statewide", "merges": "\u0120state wide", "count": 639, "type": "remove by frequency"} -{"id": 34070, "token": "\u0120Tucker", "merges": "\u0120T ucker", "count": 639, "type": "remove by frequency"} -{"id": 45966, "token": "\u0120monop", "merges": "\u0120mon op", "count": 639, "type": "remove by frequency"} -{"id": 26243, "token": "\u0120n\u00c3\u00a4", "merges": "\u0120n \u00c3\u00a4", "count": 639, "type": "remove by frequency"} -{"id": 34310, "token": "\u0120overruled", "merges": "\u0120overr uled", "count": 640, "type": "remove by frequency"} -{"id": 45349, "token": "\u0120Fighting", "merges": "\u0120F ighting", "count": 640, "type": "remove by frequency"} -{"id": 42900, "token": "\u0120Trophy", "merges": "\u0120T rophy", "count": 640, "type": "remove by frequency"} -{"id": 28128, "token": "))**(-", "merges": ")) **(-", "count": 640, "type": "remove by frequency"} -{"id": 32543, "token": "\u0120Pray", "merges": "\u0120P ray", "count": 640, "type": "remove by frequency"} -{"id": 43624, "token": "\u0120Bret", "merges": "\u0120B ret", "count": 640, "type": "remove by frequency"} -{"id": 10968, "token": "\u0120Sov", "merges": "\u0120S ov", "count": 640, "type": "remove by frequency"} -{"id": 50293, "token": "\u00e2\u0122\u0138", "merges": "\u00e2\u0122 \u0138", "count": 640, "type": "remove by frequency"} -{"id": 19331, "token": "\u0120heterogeneity", "merges": "\u0120heter ogeneity", "count": 641, "type": "remove by frequency"} -{"id": 40755, "token": "\u0120automorphism", "merges": "\u0120autom orphism", "count": 641, "type": "remove by frequency"} -{"id": 43556, "token": "\u0120columnist", "merges": "\u0120column ist", "count": 641, "type": "remove by frequency"} -{"id": 36580, "token": "\u0120Angels", "merges": "\u0120Ang els", "count": 641, "type": "remove by frequency"} -{"id": 45165, "token": "\u0120Witch", "merges": "\u0120W itch", "count": 641, "type": "remove by frequency"} -{"id": 8755, "token": "\u0120Lemma", "merges": "\u0120Lem ma", "count": 641, "type": "remove by frequency"} -{"id": 45977, "token": "\u0120Fors", "merges": "\u0120F ors", "count": 641, "type": "remove by frequency"} -{"id": 49324, "token": "\u0120Vamp", "merges": "\u0120V amp", "count": 641, "type": "remove by frequency"} -{"id": 27830, "token": "\u0120p\u00c5\u013b", "merges": "\u0120p \u00c5\u013b", "count": 641, "type": "remove by frequency"} -{"id": 52939, "token": "\u00e7\u0139\u0142", "merges": "\u00e7\u0139 \u0142", "count": 641, "type": "remove by frequency"} -{"id": 33736, "token": "\u0120spectroscopic", "merges": "\u0120spectrosc opic", "count": 642, "type": "remove by frequency"} -{"id": 31046, "token": "\u0120eukary", "merges": "\u0120e ukary", "count": 642, "type": "remove by frequency"} -{"id": 28516, "token": "\u0120glomer", "merges": "\u0120gl omer", "count": 642, "type": "remove by frequency"} -{"id": 49121, "token": "\u0120Essays", "merges": "\u0120Ess ays", "count": 642, "type": "remove by frequency"} -{"id": 37089, "token": "\u0120Lips", "merges": "\u0120L ips", "count": 642, "type": "remove by frequency"} -{"id": 18573, "token": "alkyl", "merges": "alk yl", "count": 642, "type": "remove by frequency"} -{"id": 45631, "token": "\u0120\u00c3\u00a9v", "merges": "\u0120\u00c3\u00a9 v", "count": 642, "type": "remove by frequency"} -{"id": 21039, "token": "\u0120platelet", "merges": "\u0120plate let", "count": 643, "type": "remove by frequency"} -{"id": 40180, "token": "\u0120creamy", "merges": "\u0120cream y", "count": 643, "type": "remove by frequency"} -{"id": 39736, "token": "\u0120Dixon", "merges": "\u0120D ixon", "count": 643, "type": "remove by frequency"} -{"id": 21965, "token": "\u0120Soon", "merges": "\u0120So on", "count": 643, "type": "remove by frequency"} -{"id": 32585, "token": "TRODUCTION", "merges": "TR ODUCTION", "count": 644, "type": "remove by frequency"} -{"id": 49540, "token": "\u0120Prospect", "merges": "\u0120Pro spect", "count": 644, "type": "remove by frequency"} -{"id": 22671, "token": "\u0120Cameron", "merges": "\u0120Cam eron", "count": 644, "type": "remove by frequency"} -{"id": 29944, "token": "\u0120Hardy", "merges": "\u0120Hard y", "count": 644, "type": "remove by frequency"} -{"id": 47976, "token": "\u0120Gru", "merges": "\u0120G ru", "count": 644, "type": "remove by frequency"} -{"id": 25378, "token": "\u00d9\u012b", "merges": "\u00d9 \u012b", "count": 644, "type": "remove by frequency"} -{"id": 50283, "token": "\u00cb\u012a", "merges": "\u00cb \u012a", "count": 644, "type": "remove by frequency"} -{"id": 42849, "token": "\u0120Consortium", "merges": "\u0120Cons ortium", "count": 645, "type": "remove by frequency"} -{"id": 38618, "token": "newcommand", "merges": "new command", "count": 645, "type": "remove by frequency"} -{"id": 13533, "token": "\u0120Toronto", "merges": "\u0120Tor onto", "count": 645, "type": "remove by frequency"} -{"id": 22663, "token": "\u0120Harper", "merges": "\u0120Har per", "count": 645, "type": "remove by frequency"} -{"id": 29926, "token": "\u0120Griffin", "merges": "\u0120Griff in", "count": 646, "type": "remove by frequency"} -{"id": 47663, "token": "\u0120millig", "merges": "\u0120mill ig", "count": 646, "type": "remove by frequency"} -{"id": 50279, "token": "\u00c9\u0136", "merges": "\u00c9 \u0136", "count": 646, "type": "remove by frequency"} -{"id": 31199, "token": "\u0120Terror", "merges": "\u0120T error", "count": 647, "type": "remove by frequency"} -{"id": 23442, "token": "\u0120Helen", "merges": "\u0120Hel en", "count": 647, "type": "remove by frequency"} -{"id": 36044, "token": "\u0120Hunting", "merges": "\u0120Hun ting", "count": 648, "type": "remove by frequency"} -{"id": 36061, "token": "oplasmic", "merges": "oplasm ic", "count": 648, "type": "remove by frequency"} -{"id": 17793, "token": "\u0120hypox", "merges": "\u0120hyp ox", "count": 648, "type": "remove by frequency"} -{"id": 45858, "token": "\u0120Yosh", "merges": "\u0120Y osh", "count": 648, "type": "remove by frequency"} -{"id": 30598, "token": "\u00e0\u00a4\u00b9", "merges": "\u00e0\u00a4 \u00b9", "count": 648, "type": "remove by frequency"} -{"id": 27331, "token": "\u0120pharmacological", "merges": "\u0120pharmac ological", "count": 649, "type": "remove by frequency"} -{"id": 31639, "token": "\u0120Interior", "merges": "\u0120Inter ior", "count": 649, "type": "remove by frequency"} -{"id": 35463, "token": "\u0120Lance", "merges": "\u0120L ance", "count": 649, "type": "remove by frequency"} -{"id": 48366, "token": "\u0120Jail", "merges": "\u0120J ail", "count": 649, "type": "remove by frequency"} -{"id": 38943, "token": "}}|", "merges": "}} |", "count": 649, "type": "remove by frequency"} -{"id": 31298, "token": "\u0120Proceedings", "merges": "\u0120Pro ceedings", "count": 650, "type": "remove by frequency"} -{"id": 44339, "token": "\u0120Shirley", "merges": "\u0120Shir ley", "count": 650, "type": "remove by frequency"} -{"id": 45705, "token": "\u0120phenol", "merges": "\u0120phen ol", "count": 650, "type": "remove by frequency"} -{"id": 32826, "token": "\u0120Tall", "merges": "\u0120T all", "count": 650, "type": "remove by frequency"} -{"id": 47900, "token": "\u0120Oliv", "merges": "\u0120Ol iv", "count": 650, "type": "remove by frequency"} -{"id": 48905, "token": "\u0120Mush", "merges": "\u0120M ush", "count": 650, "type": "remove by frequency"} -{"id": 17152, "token": "\u0120Legisl", "merges": "\u0120Leg isl", "count": 651, "type": "remove by frequency"} -{"id": 43354, "token": "\u0120Bates", "merges": "\u0120B ates", "count": 651, "type": "remove by frequency"} -{"id": 17686, "token": "\u0120\u00cf\u0126\u00ce\u00bf", "merges": "\u0120\u00cf\u0126 \u00ce\u00bf", "count": 651, "type": "remove by frequency"} -{"id": 40938, "token": "\u0120Dogs", "merges": "\u0120D ogs", "count": 651, "type": "remove by frequency"} -{"id": 50310, "token": "\u00e2\u0126\u0138", "merges": "\u00e2\u0126 \u0138", "count": 651, "type": "remove by frequency"} -{"id": 50130, "token": "\u0120neutralizing", "merges": "\u0120neutral izing", "count": 652, "type": "remove by frequency"} -{"id": 33477, "token": "\u0120corneal", "merges": "\u0120corne al", "count": 652, "type": "remove by frequency"} -{"id": 49658, "token": "\u0120epistem", "merges": "\u0120epist em", "count": 652, "type": "remove by frequency"} -{"id": 24962, "token": "\u0120Oscar", "merges": "\u0120O scar", "count": 652, "type": "remove by frequency"} -{"id": 41425, "token": "\u0120Gamb", "merges": "\u0120G amb", "count": 652, "type": "remove by frequency"} -{"id": 39921, "token": "\u0120Contemporary", "merges": "\u0120Cont emporary", "count": 653, "type": "remove by frequency"} -{"id": 24324, "token": "\u0120Warner", "merges": "\u0120War ner", "count": 653, "type": "remove by frequency"} -{"id": 44101, "token": "stating", "merges": "st ating", "count": 653, "type": "remove by frequency"} -{"id": 38056, "token": "\u0120Surve", "merges": "\u0120Sur ve", "count": 653, "type": "remove by frequency"} -{"id": 27779, "token": "\u0120Herr", "merges": "\u0120H err", "count": 653, "type": "remove by frequency"} -{"id": 44367, "token": "\u0120EGF", "merges": "\u0120E GF", "count": 653, "type": "remove by frequency"} -{"id": 32432, "token": "ferentially", "merges": "ferent ially", "count": 654, "type": "remove by frequency"} -{"id": 35971, "token": "\u0120Chronicle", "merges": "\u0120Chron icle", "count": 654, "type": "remove by frequency"} -{"id": 40289, "token": "\u0120Doyle", "merges": "\u0120D oyle", "count": 654, "type": "remove by frequency"} -{"id": 29053, "token": "\u0120Burke", "merges": "\u0120Bur ke", "count": 654, "type": "remove by frequency"} -{"id": 35188, "token": "\u0120excitations", "merges": "\u0120excit ations", "count": 655, "type": "remove by frequency"} -{"id": 20904, "token": "\u0120Cape", "merges": "\u0120C ape", "count": 655, "type": "remove by frequency"} -{"id": 42789, "token": "})+", "merges": "}) +", "count": 655, "type": "remove by frequency"} -{"id": 34405, "token": "\u0120Robertson", "merges": "\u0120Roberts on", "count": 656, "type": "remove by frequency"} -{"id": 34929, "token": "\u0120lesbian", "merges": "\u0120les bian", "count": 656, "type": "remove by frequency"} -{"id": 48537, "token": "\u0120amplic", "merges": "\u0120am plic", "count": 656, "type": "remove by frequency"} -{"id": 44575, "token": "\u0120Maple", "merges": "\u0120Map le", "count": 656, "type": "remove by frequency"} -{"id": 47472, "token": "\u0120Fans", "merges": "\u0120F ans", "count": 656, "type": "remove by frequency"} -{"id": 48669, "token": "\u0120Tir", "merges": "\u0120T ir", "count": 656, "type": "remove by frequency"} -{"id": 29243, "token": "\u0120McL", "merges": "\u0120Mc L", "count": 656, "type": "remove by frequency"} -{"id": 12577, "token": "\\,\\", "merges": "\\ ,\\", "count": 656, "type": "remove by frequency"} -{"id": 40623, "token": "\u0120hydrodynamic", "merges": "\u0120hydro dynamic", "count": 657, "type": "remove by frequency"} -{"id": 47169, "token": "\u00e3\u0123\u00a6\u00e3\u0123\u0126\u00e3\u0123\u00be\u00e3\u0123\u013b", "merges": "\u00e3\u0123\u00a6\u00e3\u0123\u0126 \u00e3\u0123\u00be\u00e3\u0123\u013b", "count": 657, "type": "remove by frequency"} -{"id": 21519, "token": "\u0120Roosevelt", "merges": "\u0120Ro osevelt", "count": 657, "type": "remove by frequency"} -{"id": 31580, "token": "\u0120Walt", "merges": "\u0120W alt", "count": 657, "type": "remove by frequency"} -{"id": 7182, "token": ")=\\", "merges": ") =\\", "count": 657, "type": "remove by frequency"} -{"id": 47050, "token": "\u0120Werner", "merges": "\u0120Wer ner", "count": 658, "type": "remove by frequency"} -{"id": 36338, "token": "\u0120Drake", "merges": "\u0120Dra ke", "count": 658, "type": "remove by frequency"} -{"id": 31293, "token": "\u0120Understanding", "merges": "\u0120Under standing", "count": 659, "type": "remove by frequency"} -{"id": 48845, "token": "\u0120libertarian", "merges": "\u0120libert arian", "count": 659, "type": "remove by frequency"} -{"id": 43359, "token": "\u0120covariant", "merges": "\u0120cov ariant", "count": 659, "type": "remove by frequency"} -{"id": 20518, "token": "\u0120miRNA", "merges": "\u0120mi RNA", "count": 659, "type": "remove by frequency"} -{"id": 40905, "token": "\u0120Booth", "merges": "\u0120Bo oth", "count": 659, "type": "remove by frequency"} -{"id": 36146, "token": "\u0120muito", "merges": "\u0120muit o", "count": 659, "type": "remove by frequency"} -{"id": 45843, "token": "\u0120$@", "merges": "\u0120$ @", "count": 659, "type": "remove by frequency"} -{"id": 32907, "token": "\u0120Investigation", "merges": "\u0120Invest igation", "count": 660, "type": "remove by frequency"} -{"id": 37804, "token": "\u0120pituitary", "merges": "\u0120pit uitary", "count": 660, "type": "remove by frequency"} -{"id": 32958, "token": "\u0120Carroll", "merges": "\u0120Car roll", "count": 660, "type": "remove by frequency"} -{"id": 39285, "token": "\u0120Thr", "merges": "\u0120Th r", "count": 660, "type": "remove by frequency"} -{"id": 47187, "token": "\u0120willful", "merges": "\u0120will ful", "count": 661, "type": "remove by frequency"} -{"id": 15236, "token": "\u0120biomark", "merges": "\u0120biom ark", "count": 662, "type": "remove by frequency"} -{"id": 41119, "token": "\u0120nitride", "merges": "\u0120nit ride", "count": 662, "type": "remove by frequency"} -{"id": 22723, "token": "\u0120Sean", "merges": "\u0120Se an", "count": 662, "type": "remove by frequency"} -{"id": 39133, "token": "_);", "merges": "_ );", "count": 662, "type": "remove by frequency"} -{"id": 35299, "token": "\u0120laparoscopic", "merges": "\u0120lapar oscopic", "count": 663, "type": "remove by frequency"} -{"id": 46068, "token": "SEQUENTIAL", "merges": "SEQU ENTIAL", "count": 663, "type": "remove by frequency"} -{"id": 38202, "token": "\u0120reticul", "merges": "\u0120ret icul", "count": 663, "type": "remove by frequency"} -{"id": 15732, "token": "\u0120Vietnam", "merges": "\u0120Viet nam", "count": 663, "type": "remove by frequency"} -{"id": 48092, "token": "\u0120siempre", "merges": "\u0120si empre", "count": 663, "type": "remove by frequency"} -{"id": 34597, "token": "\u0120raped", "merges": "\u0120ra ped", "count": 663, "type": "remove by frequency"} -{"id": 18654, "token": "\u0120auch", "merges": "\u0120a uch", "count": 663, "type": "remove by frequency"} -{"id": 21027, "token": "\u00ce\u00bd\u00ce\u00bf", "merges": "\u00ce\u00bd \u00ce\u00bf", "count": 663, "type": "remove by frequency"} -{"id": 48756, "token": "\u00e3\u0123\u0134", "merges": "\u00e3\u0123 \u0134", "count": 663, "type": "remove by frequency"} -{"id": 48603, "token": "culoskeletal", "merges": "culos keletal", "count": 664, "type": "remove by frequency"} -{"id": 11545, "token": "\u0120Illinois", "merges": "\u0120Ill inois", "count": 664, "type": "remove by frequency"} -{"id": 18618, "token": "\u0120Malays", "merges": "\u0120Mal ays", "count": 664, "type": "remove by frequency"} -{"id": 44150, "token": "\u0120Rescue", "merges": "\u0120Res cue", "count": 664, "type": "remove by frequency"} -{"id": 41508, "token": "rogenic", "merges": "rogen ic", "count": 664, "type": "remove by frequency"} -{"id": 46574, "token": "\u0120Postal", "merges": "\u0120Post al", "count": 664, "type": "remove by frequency"} -{"id": 43504, "token": "\u0120Ideas", "merges": "\u0120Ide as", "count": 664, "type": "remove by frequency"} -{"id": 45309, "token": "\u0120Ricci", "merges": "\u0120Ric ci", "count": 664, "type": "remove by frequency"} -{"id": 49368, "token": "\u0120Audi", "merges": "\u0120Aud i", "count": 664, "type": "remove by frequency"} -{"id": 48535, "token": "\u0120Vall", "merges": "\u0120V all", "count": 664, "type": "remove by frequency"} -{"id": 45432, "token": "\u0120goddamn", "merges": "\u0120god damn", "count": 665, "type": "remove by frequency"} -{"id": 48248, "token": "\u0120Edison", "merges": "\u0120Ed ison", "count": 665, "type": "remove by frequency"} -{"id": 34599, "token": "\u0120Univ", "merges": "\u0120Un iv", "count": 666, "type": "remove by frequency"} -{"id": 40098, "token": "\u0120Auf", "merges": "\u0120A uf", "count": 666, "type": "remove by frequency"} -{"id": 46815, "token": "\u0120Sug", "merges": "\u0120S ug", "count": 666, "type": "remove by frequency"} -{"id": 18945, "token": "\u0120LPS", "merges": "\u0120L PS", "count": 666, "type": "remove by frequency"} -{"id": 17361, "token": "\u0120Sydney", "merges": "\u0120Syd ney", "count": 667, "type": "remove by frequency"} -{"id": 41004, "token": "\u0120PEOPLE", "merges": "\u0120PE OPLE", "count": 667, "type": "remove by frequency"} -{"id": 36496, "token": "\u0120IRA", "merges": "\u0120I RA", "count": 667, "type": "remove by frequency"} -{"id": 34764, "token": "\\@", "merges": "\\ @", "count": 667, "type": "remove by frequency"} -{"id": 26953, "token": "\u0120Tyler", "merges": "\u0120T yler", "count": 668, "type": "remove by frequency"} -{"id": 32354, "token": "\u0120Worth", "merges": "\u0120W orth", "count": 668, "type": "remove by frequency"} -{"id": 41186, "token": "\u0120Eva", "merges": "\u0120Ev a", "count": 668, "type": "remove by frequency"} -{"id": 16333, "token": "\u0120Netherlands", "merges": "\u0120Nether lands", "count": 669, "type": "remove by frequency"} -{"id": 42661, "token": "\u0120detectives", "merges": "\u0120detect ives", "count": 669, "type": "remove by frequency"} -{"id": 27051, "token": "\u0120Rodriguez", "merges": "\u0120Rodrig uez", "count": 669, "type": "remove by frequency"} -{"id": 34013, "token": "\u0120Acts", "merges": "\u0120Act s", "count": 669, "type": "remove by frequency"} -{"id": 36894, "token": "\u00d7\u0137\u00d7\u00aa", "merges": "\u00d7\u0137\u00d7 \u00aa", "count": 669, "type": "remove by frequency"} -{"id": 33389, "token": "\u0120\u00d7\u0140", "merges": "\u0120\u00d7 \u0140", "count": 669, "type": "remove by frequency"} -{"id": 22385, "token": "\u0120quarterback", "merges": "\u0120quarter back", "count": 670, "type": "remove by frequency"} -{"id": 19425, "token": "\u0120Premier", "merges": "\u0120Prem ier", "count": 670, "type": "remove by frequency"} -{"id": 48649, "token": "\u0120spiked", "merges": "\u0120sp iked", "count": 670, "type": "remove by frequency"} -{"id": 41264, "token": "\u0120Hers", "merges": "\u0120H ers", "count": 670, "type": "remove by frequency"} -{"id": 45377, "token": "\u0120Carey", "merges": "\u0120Care y", "count": 671, "type": "remove by frequency"} -{"id": 33087, "token": "\u0120Conservation", "merges": "\u0120Conserv ation", "count": 672, "type": "remove by frequency"} -{"id": 23236, "token": "\u0120footballers", "merges": "\u0120football ers", "count": 672, "type": "remove by frequency"} -{"id": 34324, "token": "\u0120positivity", "merges": "\u0120pos itivity", "count": 672, "type": "remove by frequency"} -{"id": 44402, "token": "\u0120Julius", "merges": "\u0120Jul ius", "count": 672, "type": "remove by frequency"} -{"id": 38725, "token": "\u0120ABA", "merges": "\u0120A BA", "count": 672, "type": "remove by frequency"} -{"id": 46799, "token": "\u0120Hut", "merges": "\u0120H ut", "count": 672, "type": "remove by frequency"} -{"id": 16732, "token": "\u0120Sanders", "merges": "\u0120Sand ers", "count": 673, "type": "remove by frequency"} -{"id": 44353, "token": "\u00d9\u012c\u00d8\u00b1", "merges": "\u00d9\u012c \u00d8\u00b1", "count": 673, "type": "remove by frequency"} -{"id": 42510, "token": "\u0120haplotypes", "merges": "\u0120hapl otypes", "count": 674, "type": "remove by frequency"} -{"id": 32717, "token": "\u0120suggestive", "merges": "\u0120suggest ive", "count": 674, "type": "remove by frequency"} -{"id": 25391, "token": "iferase", "merges": "ifer ase", "count": 674, "type": "remove by frequency"} -{"id": 36172, "token": "\u0120Felix", "merges": "\u0120Fel ix", "count": 674, "type": "remove by frequency"} -{"id": 49552, "token": "\u0120Schwe", "merges": "\u0120Sch we", "count": 674, "type": "remove by frequency"} -{"id": 24010, "token": "\u0120Todd", "merges": "\u0120T odd", "count": 674, "type": "remove by frequency"} -{"id": 40915, "token": "\u0120Tue", "merges": "\u0120T ue", "count": 674, "type": "remove by frequency"} -{"id": 45173, "token": "\u0120Eck", "merges": "\u0120E ck", "count": 674, "type": "remove by frequency"} -{"id": 48381, "token": "\u0120dermal", "merges": "\u0120d ermal", "count": 675, "type": "remove by frequency"} -{"id": 41979, "token": "\u0120Pok\u00c3\u00a9mon", "merges": "\u0120Pok \u00c3\u00a9mon", "count": 676, "type": "remove by frequency"} -{"id": 38036, "token": "\u0120Fuj", "merges": "\u0120F uj", "count": 676, "type": "remove by frequency"} -{"id": 14620, "token": "\u0120\u00ce\u00bcm", "merges": "\u0120\u00ce\u00bc m", "count": 676, "type": "remove by frequency"} -{"id": 37167, "token": "\u0120\u00d7\u0133", "merges": "\u0120\u00d7 \u0133", "count": 676, "type": "remove by frequency"} -{"id": 35972, "token": "\u0120Sebastian", "merges": "\u0120Sebast ian", "count": 677, "type": "remove by frequency"} -{"id": 36943, "token": "\u0120Territory", "merges": "\u0120Ter ritory", "count": 677, "type": "remove by frequency"} -{"id": 11314, "token": "\u0120Michigan", "merges": "\u0120Mich igan", "count": 677, "type": "remove by frequency"} -{"id": 39261, "token": "\u0120epiderm", "merges": "\u0120epid erm", "count": 677, "type": "remove by frequency"} -{"id": 14067, "token": "Despite", "merges": "Des pite", "count": 677, "type": "remove by frequency"} -{"id": 45326, "token": "\u0120Annex", "merges": "\u0120An nex", "count": 677, "type": "remove by frequency"} -{"id": 32369, "token": "\u0120lumen", "merges": "\u0120lum en", "count": 677, "type": "remove by frequency"} -{"id": 47249, "token": "\u0120Feld", "merges": "\u0120F eld", "count": 677, "type": "remove by frequency"} -{"id": 36212, "token": "\u00c3\u00a4ng", "merges": "\u00c3\u00a4 ng", "count": 677, "type": "remove by frequency"} -{"id": 54607, "token": "\u00ef\u00bc\u012c", "merges": "\u00ef\u00bc \u012c", "count": 677, "type": "remove by frequency"} -{"id": 34004, "token": "^**", "merges": "^ **", "count": 677, "type": "remove by frequency"} -{"id": 15458, "token": "\u0120Sarah", "merges": "\u0120Sar ah", "count": 678, "type": "remove by frequency"} -{"id": 48360, "token": "\u0120eines", "merges": "\u0120e ines", "count": 678, "type": "remove by frequency"} -{"id": 33117, "token": "\u0120deze", "merges": "\u0120de ze", "count": 678, "type": "remove by frequency"} -{"id": 37404, "token": "\u0120GTP", "merges": "\u0120G TP", "count": 678, "type": "remove by frequency"} -{"id": 43379, "token": "\u0120autres", "merges": "\u0120aut res", "count": 679, "type": "remove by frequency"} -{"id": 35675, "token": "\u0120peric", "merges": "\u0120per ic", "count": 679, "type": "remove by frequency"} -{"id": 38921, "token": "\u0120cref", "merges": "\u0120c ref", "count": 679, "type": "remove by frequency"} -{"id": 20469, "token": "\u0120Dak", "merges": "\u0120D ak", "count": 679, "type": "remove by frequency"} -{"id": 15848, "token": "\u0120cytoplasm", "merges": "\u0120cyt oplasm", "count": 680, "type": "remove by frequency"} -{"id": 24336, "token": "\u0120Marcus", "merges": "\u0120Mar cus", "count": 680, "type": "remove by frequency"} -{"id": 20572, "token": "\u00c5\u00a5", "merges": "\u00c5 \u00a5", "count": 680, "type": "remove by frequency"} -{"id": 41867, "token": "\u0120Kaiser", "merges": "\u0120K aiser", "count": 681, "type": "remove by frequency"} -{"id": 21924, "token": "\u0120Madrid", "merges": "\u0120Mad rid", "count": 681, "type": "remove by frequency"} -{"id": 48065, "token": "\u0120favoring", "merges": "\u0120favor ing", "count": 682, "type": "remove by frequency"} -{"id": 27504, "token": "\u0120Sandy", "merges": "\u0120Sand y", "count": 682, "type": "remove by frequency"} -{"id": 39659, "token": "\u0120endl", "merges": "\u0120end l", "count": 682, "type": "remove by frequency"} -{"id": 8809, "token": "amss", "merges": "am ss", "count": 682, "type": "remove by frequency"} -{"id": 43404, "token": "\u0120Assistance", "merges": "\u0120Ass istance", "count": 683, "type": "remove by frequency"} -{"id": 44583, "token": "\u0120depuis", "merges": "\u0120dep uis", "count": 683, "type": "remove by frequency"} -{"id": 33414, "token": "\u0120Pedro", "merges": "\u0120Ped ro", "count": 683, "type": "remove by frequency"} -{"id": 43061, "token": "\"});", "merges": "\"} );", "count": 683, "type": "remove by frequency"} -{"id": 39457, "token": "\u0120Barrett", "merges": "\u0120Bar rett", "count": 684, "type": "remove by frequency"} -{"id": 48802, "token": "\u0120Archer", "merges": "\u0120Ar cher", "count": 684, "type": "remove by frequency"} -{"id": 43700, "token": "\u0120etched", "merges": "\u0120et ched", "count": 684, "type": "remove by frequency"} -{"id": 38293, "token": "oclast", "merges": "oc last", "count": 684, "type": "remove by frequency"} -{"id": 45926, "token": "\u00c3\u00aam", "merges": "\u00c3\u00aa m", "count": 684, "type": "remove by frequency"} -{"id": 40797, "token": "\u0120apr\u00c3\u00a8s", "merges": "\u0120ap r\u00c3\u00a8s", "count": 685, "type": "remove by frequency"} -{"id": 32574, "token": "\u0120Burns", "merges": "\u0120B urns", "count": 685, "type": "remove by frequency"} -{"id": 44390, "token": "\u0120Hawk", "merges": "\u0120Haw k", "count": 685, "type": "remove by frequency"} -{"id": 40300, "token": "\u0120Ital", "merges": "\u0120It al", "count": 685, "type": "remove by frequency"} -{"id": 44816, "token": "\u0120Pon", "merges": "\u0120P on", "count": 685, "type": "remove by frequency"} -{"id": 43920, "token": "\u0120Chr", "merges": "\u0120Ch r", "count": 685, "type": "remove by frequency"} -{"id": 11637, "token": "\u0120Pennsylvania", "merges": "\u0120Pen nsylvania", "count": 686, "type": "remove by frequency"} -{"id": 33502, "token": "\u0120conjugated", "merges": "\u0120conjug ated", "count": 686, "type": "remove by frequency"} -{"id": 37108, "token": "\u0120Chapman", "merges": "\u0120Chap man", "count": 687, "type": "remove by frequency"} -{"id": 40445, "token": "\u0120Typical", "merges": "\u0120Typ ical", "count": 687, "type": "remove by frequency"} -{"id": 39223, "token": "\u0120tackles", "merges": "\u0120tack les", "count": 687, "type": "remove by frequency"} -{"id": 33903, "token": "\u0120adrenal", "merges": "\u0120adren al", "count": 687, "type": "remove by frequency"} -{"id": 32648, "token": "\u0120Treaty", "merges": "\u0120Treat y", "count": 687, "type": "remove by frequency"} -{"id": 33407, "token": "\u0120Dylan", "merges": "\u0120D ylan", "count": 687, "type": "remove by frequency"} -{"id": 48398, "token": "\u0120Kad", "merges": "\u0120K ad", "count": 687, "type": "remove by frequency"} -{"id": 27743, "token": "\u0120polype", "merges": "\u0120pol ype", "count": 688, "type": "remove by frequency"} -{"id": 44363, "token": "\u0120Wings", "merges": "\u0120W ings", "count": 688, "type": "remove by frequency"} -{"id": 15234, "token": "\u00cf\u012b\u00ce\u00bd", "merges": "\u00cf\u012b \u00ce\u00bd", "count": 688, "type": "remove by frequency"} -{"id": 28544, "token": "\u00c3\u00b6s", "merges": "\u00c3\u00b6 s", "count": 688, "type": "remove by frequency"} -{"id": 50382, "token": "\u00e2\u013b\u00a1", "merges": "\u00e2\u013b \u00a1", "count": 688, "type": "remove by frequency"} -{"id": 36949, "token": "\u0120_\"", "merges": "\u0120_ \"", "count": 688, "type": "remove by frequency"} -{"id": 42597, "token": "\u0120Fraser", "merges": "\u0120Fr aser", "count": 689, "type": "remove by frequency"} -{"id": 44289, "token": "opolys", "merges": "opol ys", "count": 689, "type": "remove by frequency"} -{"id": 45693, "token": "\u00d8\u00b8", "merges": "\u00d8 \u00b8", "count": 689, "type": "remove by frequency"} -{"id": 27975, "token": "\u0120remodeling", "merges": "\u0120remod eling", "count": 690, "type": "remove by frequency"} -{"id": 34818, "token": "\u0120serotonin", "merges": "\u0120seroton in", "count": 690, "type": "remove by frequency"} -{"id": 20782, "token": "---|---", "merges": "---| ---", "count": 690, "type": "remove by frequency"} -{"id": 5143, "token": "\u0120Israel", "merges": "\u0120Is rael", "count": 690, "type": "remove by frequency"} -{"id": 33301, "token": "--**", "merges": "-- **", "count": 690, "type": "remove by frequency"} -{"id": 45812, "token": "\u0120Cic", "merges": "\u0120C ic", "count": 690, "type": "remove by frequency"} -{"id": 33514, "token": "\u0120Gur", "merges": "\u0120G ur", "count": 690, "type": "remove by frequency"} -{"id": 49272, "token": "\u0120Cellular", "merges": "\u0120C ellular", "count": 691, "type": "remove by frequency"} -{"id": 26876, "token": "\u0120Nicholas", "merges": "\u0120Nich olas", "count": 691, "type": "remove by frequency"} -{"id": 38918, "token": "\u0120Wheeler", "merges": "\u0120Whe eler", "count": 691, "type": "remove by frequency"} -{"id": 40912, "token": "\u00e0\u00a7\u0129", "merges": "\u00e0\u00a7 \u0129", "count": 691, "type": "remove by frequency"} -{"id": 44342, "token": "\u0120", "merges": "=\"# \">", "count": 752, "type": "remove by frequency"} -{"id": 41853, "token": "\u0120Immediately", "merges": "\u0120Im mediately", "count": 753, "type": "remove by frequency"} -{"id": 46312, "token": "\u0120Nicolas", "merges": "\u0120Nic olas", "count": 753, "type": "remove by frequency"} -{"id": 13227, "token": "\u0120mitochondrial", "merges": "\u0120mitochond rial", "count": 754, "type": "remove by frequency"} -{"id": 48341, "token": "\u0120immunized", "merges": "\u0120immun ized", "count": 754, "type": "remove by frequency"} -{"id": 32211, "token": "\u0120Kurt", "merges": "\u0120K urt", "count": 754, "type": "remove by frequency"} -{"id": 39407, "token": "\u0120Wid", "merges": "\u0120W id", "count": 754, "type": "remove by frequency"} -{"id": 39319, "token": "\u0120Ara", "merges": "\u0120A ra", "count": 754, "type": "remove by frequency"} -{"id": 27439, "token": "\u0120granul", "merges": "\u0120gran ul", "count": 755, "type": "remove by frequency"} -{"id": 35129, "token": "\u0120essere", "merges": "\u0120esse re", "count": 755, "type": "remove by frequency"} -{"id": 26378, "token": "\u0120Ralph", "merges": "\u0120R alph", "count": 755, "type": "remove by frequency"} -{"id": 45498, "token": "\u0120dext", "merges": "\u0120de xt", "count": 755, "type": "remove by frequency"} -{"id": 43963, "token": "Disclaimer", "merges": "Dis claimer", "count": 756, "type": "remove by frequency"} -{"id": 49994, "token": "\u0120growled", "merges": "\u0120grow led", "count": 756, "type": "remove by frequency"} -{"id": 47363, "token": "\u0120keine", "merges": "\u0120ke ine", "count": 756, "type": "remove by frequency"} -{"id": 19466, "token": "\u0120Bowl", "merges": "\u0120Bow l", "count": 756, "type": "remove by frequency"} -{"id": 34997, "token": "\u0120skal", "merges": "\u0120sk al", "count": 756, "type": "remove by frequency"} -{"id": 17415, "token": "\u0120recombinant", "merges": "\u0120recom binant", "count": 757, "type": "remove by frequency"} -{"id": 20009, "token": "\u0120putative", "merges": "\u0120put ative", "count": 757, "type": "remove by frequency"} -{"id": 11918, "token": "\u0120Captain", "merges": "\u0120Capt ain", "count": 757, "type": "remove by frequency"} -{"id": 26686, "token": "\u0120Kant", "merges": "\u0120K ant", "count": 757, "type": "remove by frequency"} -{"id": 43918, "token": "\u0120fMRI", "merges": "\u0120f MRI", "count": 757, "type": "remove by frequency"} -{"id": 39174, "token": "\u0120suspensions", "merges": "\u0120susp ensions", "count": 758, "type": "remove by frequency"} -{"id": 37629, "token": "\u0120Traditional", "merges": "\u0120Tr aditional", "count": 758, "type": "remove by frequency"} -{"id": 34136, "token": "\u0120Advisory", "merges": "\u0120Ad visory", "count": 758, "type": "remove by frequency"} -{"id": 38315, "token": "\u0120Aviation", "merges": "\u0120Av iation", "count": 758, "type": "remove by frequency"} -{"id": 29947, "token": "tochrome", "merges": "to chrome", "count": 758, "type": "remove by frequency"} -{"id": 34909, "token": "\u0120Norton", "merges": "\u0120Nort on", "count": 758, "type": "remove by frequency"} -{"id": 45998, "token": "\u0120Hog", "merges": "\u0120H og", "count": 758, "type": "remove by frequency"} -{"id": 32524, "token": "\u0120Roh", "merges": "\u0120R oh", "count": 758, "type": "remove by frequency"} -{"id": 50137, "token": "\u0120linearity", "merges": "\u0120linear ity", "count": 759, "type": "remove by frequency"} -{"id": 12090, "token": "\u0120Several", "merges": "\u0120S everal", "count": 759, "type": "remove by frequency"} -{"id": 20703, "token": "osexual", "merges": "osex ual", "count": 759, "type": "remove by frequency"} -{"id": 48732, "token": "\u0120galact", "merges": "\u0120gal act", "count": 759, "type": "remove by frequency"} -{"id": 47785, "token": "ophores", "merges": "op hores", "count": 759, "type": "remove by frequency"} -{"id": 32005, "token": "\u0120Fear", "merges": "\u0120F ear", "count": 759, "type": "remove by frequency"} -{"id": 36244, "token": "\u0120Prix", "merges": "\u0120P rix", "count": 759, "type": "remove by frequency"} -{"id": 28252, "token": "\u0120\u00c5\u00bc", "merges": "\u0120\u00c5 \u00bc", "count": 759, "type": "remove by frequency"} -{"id": 36740, "token": "\u0120Democracy", "merges": "\u0120Democr acy", "count": 760, "type": "remove by frequency"} -{"id": 45149, "token": "\u0120resonator", "merges": "\u0120reson ator", "count": 760, "type": "remove by frequency"} -{"id": 34147, "token": "\u0120Dies", "merges": "\u0120D ies", "count": 760, "type": "remove by frequency"} -{"id": 35110, "token": "\u0120Certain", "merges": "\u0120C ertain", "count": 761, "type": "remove by frequency"} -{"id": 46474, "token": "\u0120NAC", "merges": "\u0120N AC", "count": 761, "type": "remove by frequency"} -{"id": 34254, "token": "\u0120neurotrans", "merges": "\u0120neuro trans", "count": 762, "type": "remove by frequency"} -{"id": 32831, "token": "\u0120Defence", "merges": "\u0120Def ence", "count": 762, "type": "remove by frequency"} -{"id": 21482, "token": "\u0120Ethics", "merges": "\u0120Eth ics", "count": 762, "type": "remove by frequency"} -{"id": 43253, "token": "\u0120lobes", "merges": "\u0120lob es", "count": 762, "type": "remove by frequency"} -{"id": 42224, "token": "\u0120Beau", "merges": "\u0120Be au", "count": 762, "type": "remove by frequency"} -{"id": 50398, "token": "\u00e3\u0122\u0139", "merges": "\u00e3\u0122 \u0139", "count": 762, "type": "remove by frequency"} -{"id": 45436, "token": "\u0120filmmaker", "merges": "\u0120filmm aker", "count": 763, "type": "remove by frequency"} -{"id": 37047, "token": "\u0120Scholars", "merges": "\u0120Sch olars", "count": 763, "type": "remove by frequency"} -{"id": 47313, "token": "\u0120Conway", "merges": "\u0120Con way", "count": 763, "type": "remove by frequency"} -{"id": 41446, "token": "Syscall", "merges": "S yscall", "count": 763, "type": "remove by frequency"} -{"id": 44345, "token": "\u0120ahora", "merges": "\u0120a hora", "count": 763, "type": "remove by frequency"} -{"id": 39749, "token": "\u0120Athletic", "merges": "\u0120Athlet ic", "count": 764, "type": "remove by frequency"} -{"id": 41370, "token": "Footnote", "merges": "Foot note", "count": 764, "type": "remove by frequency"} -{"id": 36063, "token": "\u0120Jacques", "merges": "\u0120Jac ques", "count": 764, "type": "remove by frequency"} -{"id": 35503, "token": "\u0120Rosa", "merges": "\u0120R osa", "count": 764, "type": "remove by frequency"} -{"id": 50397, "token": "\u00e3\u0122\u0138", "merges": "\u00e3\u0122 \u0138", "count": 764, "type": "remove by frequency"} -{"id": 41338, "token": "z\u00c4\u0127", "merges": "z \u00c4\u0127", "count": 765, "type": "remove by frequency"} -{"id": 39406, "token": "\u0120equivalently", "merges": "\u0120equival ently", "count": 766, "type": "remove by frequency"} -{"id": 49589, "token": "\u0120Turning", "merges": "\u0120Turn ing", "count": 766, "type": "remove by frequency"} -{"id": 47492, "token": "\u0120Innov", "merges": "\u0120Inn ov", "count": 766, "type": "remove by frequency"} -{"id": 15902, "token": "\u0120\u00ce\u00bd", "merges": "\u0120\u00ce \u00bd", "count": 766, "type": "remove by frequency"} -{"id": 24389, "token": "\u0120hybridization", "merges": "\u0120hybrid ization", "count": 767, "type": "remove by frequency"} -{"id": 18850, "token": "\u0120tablespoons", "merges": "\u0120tables poons", "count": 767, "type": "remove by frequency"} -{"id": 47273, "token": "\u0120propriety", "merges": "\u0120propri ety", "count": 767, "type": "remove by frequency"} -{"id": 26853, "token": "\u0120Bennett", "merges": "\u0120Benn ett", "count": 767, "type": "remove by frequency"} -{"id": 34167, "token": "\u0120Weiss", "merges": "\u0120We iss", "count": 767, "type": "remove by frequency"} -{"id": 9831, "token": "\u0120mM", "merges": "\u0120m M", "count": 767, "type": "remove by frequency"} -{"id": 48837, "token": "\u0120Spearman", "merges": "\u0120Spe arman", "count": 768, "type": "remove by frequency"} -{"id": 18457, "token": "\u0120Indones", "merges": "\u0120Ind ones", "count": 768, "type": "remove by frequency"} -{"id": 41327, "token": "\u0120Poinc", "merges": "\u0120Po inc", "count": 768, "type": "remove by frequency"} -{"id": 47445, "token": "\u0120Welch", "merges": "\u0120Wel ch", "count": 768, "type": "remove by frequency"} -{"id": 15473, "token": "\u0120IgG", "merges": "\u0120Ig G", "count": 768, "type": "remove by frequency"} -{"id": 49577, "token": "\u0120Hmm", "merges": "\u0120H mm", "count": 768, "type": "remove by frequency"} -{"id": 48214, "token": "\u00c3\u00a9v", "merges": "\u00c3\u00a9 v", "count": 768, "type": "remove by frequency"} -{"id": 28421, "token": "\u0120dehydrogenase", "merges": "\u0120dehydrogen ase", "count": 769, "type": "remove by frequency"} -{"id": 46266, "token": "Salmonella", "merges": "Sal monella", "count": 769, "type": "remove by frequency"} -{"id": 42778, "token": "\u0120Penny", "merges": "\u0120P enny", "count": 769, "type": "remove by frequency"} -{"id": 30835, "token": "\u0120Appl", "merges": "\u0120App l", "count": 769, "type": "remove by frequency"} -{"id": 43006, "token": "\u0120Excellence", "merges": "\u0120Excell ence", "count": 770, "type": "remove by frequency"} -{"id": 38813, "token": "\u0120aneurysm", "merges": "\u0120aneurys m", "count": 770, "type": "remove by frequency"} -{"id": 7670, "token": "\u0120Jesus", "merges": "\u0120Jes us", "count": 770, "type": "remove by frequency"} -{"id": 34806, "token": "\u0120Elder", "merges": "\u0120Eld er", "count": 772, "type": "remove by frequency"} -{"id": 47489, "token": "\u0120mucus", "merges": "\u0120muc us", "count": 772, "type": "remove by frequency"} -{"id": 38397, "token": "\u0120Schwarz", "merges": "\u0120Sch warz", "count": 773, "type": "remove by frequency"} -{"id": 37476, "token": "butyl", "merges": "but yl", "count": 773, "type": "remove by frequency"} -{"id": 31461, "token": "\u0120\u00d9\u0127\u00d9\u0128", "merges": "\u0120\u00d9\u0127 \u00d9\u0128", "count": 773, "type": "remove by frequency"} -{"id": 27614, "token": "\u0120Rah", "merges": "\u0120R ah", "count": 773, "type": "remove by frequency"} -{"id": 21161, "token": "})^", "merges": "}) ^", "count": 773, "type": "remove by frequency"} -{"id": 29271, "token": "\u0120Harold", "merges": "\u0120Har old", "count": 774, "type": "remove by frequency"} -{"id": 28712, "token": "\u0120Marco", "merges": "\u0120Mar co", "count": 774, "type": "remove by frequency"} -{"id": 40266, "token": "\u0120Infl", "merges": "\u0120In fl", "count": 774, "type": "remove by frequency"} -{"id": 29241, "token": "\u0120Duc", "merges": "\u0120D uc", "count": 774, "type": "remove by frequency"} -{"id": 35605, "token": "\u0120biologically", "merges": "\u0120bi ologically", "count": 775, "type": "remove by frequency"} -{"id": 32271, "token": "\u0120Whenever", "merges": "\u0120Whe never", "count": 775, "type": "remove by frequency"} -{"id": 36071, "token": "emente", "merges": "ement e", "count": 775, "type": "remove by frequency"} -{"id": 41467, "token": "\u0120Basin", "merges": "\u0120Bas in", "count": 775, "type": "remove by frequency"} -{"id": 22143, "token": "\u00e0\u00a4\u0124", "merges": "\u00e0\u00a4 \u0124", "count": 775, "type": "remove by frequency"} -{"id": 26002, "token": "\u0120Costa", "merges": "\u0120Cost a", "count": 776, "type": "remove by frequency"} -{"id": 47353, "token": "\u0120grafting", "merges": "\u0120graft ing", "count": 777, "type": "remove by frequency"} -{"id": 40566, "token": "\u0120Kab", "merges": "\u0120K ab", "count": 777, "type": "remove by frequency"} -{"id": 43845, "token": "\u00d7\u00a4", "merges": "\u00d7 \u00a4", "count": 777, "type": "remove by frequency"} -{"id": 46687, "token": "insects", "merges": "in sects", "count": 778, "type": "remove by frequency"} -{"id": 25158, "token": "\u0120fungal", "merges": "\u0120fun gal", "count": 778, "type": "remove by frequency"} -{"id": 24940, "token": "\u0120Roche", "merges": "\u0120R oche", "count": 778, "type": "remove by frequency"} -{"id": 30904, "token": "\u0120Thorn", "merges": "\u0120Th orn", "count": 778, "type": "remove by frequency"} -{"id": 29884, "token": "\u0120Theod", "merges": "\u0120The od", "count": 778, "type": "remove by frequency"} -{"id": 40765, "token": "\u0120anion", "merges": "\u0120an ion", "count": 778, "type": "remove by frequency"} -{"id": 16739, "token": "\u0120Syd", "merges": "\u0120Sy d", "count": 778, "type": "remove by frequency"} -{"id": 18453, "token": "\u00c3\u00a4m", "merges": "\u00c3\u00a4 m", "count": 778, "type": "remove by frequency"} -{"id": 43480, "token": "\u00c5\u012a", "merges": "\u00c5 \u012a", "count": 778, "type": "remove by frequency"} -{"id": 19990, "token": "\u0120fibrosis", "merges": "\u0120fib rosis", "count": 779, "type": "remove by frequency"} -{"id": 43048, "token": "\u0120Teachers", "merges": "\u0120Te achers", "count": 779, "type": "remove by frequency"} -{"id": 44608, "token": "\u0120ortholog", "merges": "\u0120orth olog", "count": 779, "type": "remove by frequency"} -{"id": 33589, "token": "\u0120Centers", "merges": "\u0120Cent ers", "count": 779, "type": "remove by frequency"} -{"id": 31692, "token": "\u0120Summit", "merges": "\u0120Sum mit", "count": 779, "type": "remove by frequency"} -{"id": 40059, "token": "\u0120Santos", "merges": "\u0120Sant os", "count": 779, "type": "remove by frequency"} -{"id": 23259, "token": "\u0120Ian", "merges": "\u0120I an", "count": 779, "type": "remove by frequency"} -{"id": 22443, "token": "\u0120touchdown", "merges": "\u0120touch down", "count": 780, "type": "remove by frequency"} -{"id": 37770, "token": "\u0120Graduate", "merges": "\u0120Grad uate", "count": 780, "type": "remove by frequency"} -{"id": 48678, "token": "\u0120Believe", "merges": "\u0120Bel ieve", "count": 780, "type": "remove by frequency"} -{"id": 21265, "token": "\u0120Alber", "merges": "\u0120Al ber", "count": 780, "type": "remove by frequency"} -{"id": 40456, "token": "\u0120Strategic", "merges": "\u0120Strateg ic", "count": 781, "type": "remove by frequency"} -{"id": 31368, "token": "\u0120filaments", "merges": "\u0120fil aments", "count": 781, "type": "remove by frequency"} -{"id": 39219, "token": "\u0120Healthy", "merges": "\u0120Health y", "count": 781, "type": "remove by frequency"} -{"id": 49890, "token": "\u0120Lange", "merges": "\u0120L ange", "count": 781, "type": "remove by frequency"} -{"id": 32706, "token": "\u0120Sah", "merges": "\u0120S ah", "count": 781, "type": "remove by frequency"} -{"id": 29936, "token": "\u0120Raymond", "merges": "\u0120Ray mond", "count": 782, "type": "remove by frequency"} -{"id": 47087, "token": "\u0120guitars", "merges": "\u0120gu itars", "count": 782, "type": "remove by frequency"} -{"id": 18949, "token": "\u0120zijn", "merges": "\u0120z ijn", "count": 782, "type": "remove by frequency"} -{"id": 26153, "token": "\u0120Abb", "merges": "\u0120Ab b", "count": 782, "type": "remove by frequency"} -{"id": 16482, "token": "\u00e1\u00bf", "merges": "\u00e1 \u00bf", "count": 782, "type": "remove by frequency"} -{"id": 23451, "token": "\u0120tablespoon", "merges": "\u0120tables poon", "count": 783, "type": "remove by frequency"} -{"id": 22618, "token": "\u0120Phillips", "merges": "\u0120Phill ips", "count": 783, "type": "remove by frequency"} -{"id": 28373, "token": "\u0120Bald", "merges": "\u0120B ald", "count": 783, "type": "remove by frequency"} -{"id": 33050, "token": "\u0120Till", "merges": "\u0120T ill", "count": 783, "type": "remove by frequency"} -{"id": 37057, "token": "\u0120foci", "merges": "\u0120f oci", "count": 783, "type": "remove by frequency"} -{"id": 5789, "token": ")/(-", "merges": ")/ (-", "count": 783, "type": "remove by frequency"} -{"id": 38567, "token": "\u0120Publications", "merges": "\u0120Public ations", "count": 784, "type": "remove by frequency"} -{"id": 27147, "token": "\u0120Healthcare", "merges": "\u0120Health care", "count": 784, "type": "remove by frequency"} -{"id": 46303, "token": "\u0120lymphatic", "merges": "\u0120lymph atic", "count": 784, "type": "remove by frequency"} -{"id": 38799, "token": "\u0120Relief", "merges": "\u0120Rel ief", "count": 784, "type": "remove by frequency"} -{"id": 17729, "token": "\u0120Corps", "merges": "\u0120Cor ps", "count": 784, "type": "remove by frequency"} -{"id": 5095, "token": "\u0120\\,", "merges": "\u0120\\ ,", "count": 784, "type": "remove by frequency"} -{"id": 46437, "token": "\u0120Opportunity", "merges": "\u0120Opp ortunity", "count": 785, "type": "remove by frequency"} -{"id": 16200, "token": "\u0120metastasis", "merges": "\u0120metast asis", "count": 785, "type": "remove by frequency"} -{"id": 41755, "token": "\u0120migraine", "merges": "\u0120mig raine", "count": 785, "type": "remove by frequency"} -{"id": 36656, "token": "\u0120MVC", "merges": "\u0120M VC", "count": 785, "type": "remove by frequency"} -{"id": 43105, "token": "\u0120Wet", "merges": "\u0120W et", "count": 785, "type": "remove by frequency"} -{"id": 41855, "token": "\u00d7\u00a7", "merges": "\u00d7 \u00a7", "count": 785, "type": "remove by frequency"} -{"id": 40183, "token": "\u0120abscess", "merges": "\u0120abs cess", "count": 786, "type": "remove by frequency"} -{"id": 34517, "token": "\u0120durante", "merges": "\u0120dur ante", "count": 786, "type": "remove by frequency"} -{"id": 33718, "token": "\u0120Burton", "merges": "\u0120Bur ton", "count": 786, "type": "remove by frequency"} -{"id": 6729, "token": "\u0120Obama", "merges": "\u0120Ob ama", "count": 786, "type": "remove by frequency"} -{"id": 46530, "token": "\u0120Zhao", "merges": "\u0120Z hao", "count": 786, "type": "remove by frequency"} -{"id": 38410, "token": "\u0120ALS", "merges": "\u0120AL S", "count": 786, "type": "remove by frequency"} -{"id": 35942, "token": "\u0120Hundred", "merges": "\u0120H undred", "count": 787, "type": "remove by frequency"} -{"id": 29927, "token": "\u0120Owen", "merges": "\u0120O wen", "count": 787, "type": "remove by frequency"} -{"id": 48420, "token": "\u0120Waste", "merges": "\u0120W aste", "count": 788, "type": "remove by frequency"} -{"id": 17566, "token": "\u0120malignant", "merges": "\u0120malign ant", "count": 789, "type": "remove by frequency"} -{"id": 32185, "token": "\u0120Ich", "merges": "\u0120I ch", "count": 789, "type": "remove by frequency"} -{"id": 22092, "token": "\\}\\", "merges": "\\ }\\", "count": 789, "type": "remove by frequency"} -{"id": 33662, "token": "\u0120homomorphism", "merges": "\u0120hom omorphism", "count": 790, "type": "remove by frequency"} -{"id": 42832, "token": "\u0120lumines", "merges": "\u0120lum ines", "count": 790, "type": "remove by frequency"} -{"id": 44433, "token": "\u0120d\u00c3\u0143a", "merges": "\u0120d \u00c3\u0143a", "count": 790, "type": "remove by frequency"} -{"id": 17879, "token": "\u0120CIA", "merges": "\u0120C IA", "count": 790, "type": "remove by frequency"} -{"id": 46592, "token": "\u0120Adventures", "merges": "\u0120Advent ures", "count": 791, "type": "remove by frequency"} -{"id": 43779, "token": "\u0120Killing", "merges": "\u0120K illing", "count": 791, "type": "remove by frequency"} -{"id": 24496, "token": "\u0120Luther", "merges": "\u0120L uther", "count": 791, "type": "remove by frequency"} -{"id": 31890, "token": "\u0120Wals", "merges": "\u0120W als", "count": 791, "type": "remove by frequency"} -{"id": 40558, "token": "\u0120Riv", "merges": "\u0120R iv", "count": 791, "type": "remove by frequency"} -{"id": 35612, "token": "ocortic", "merges": "oc ortic", "count": 792, "type": "remove by frequency"} -{"id": 49154, "token": "odontic", "merges": "odont ic", "count": 792, "type": "remove by frequency"} -{"id": 36373, "token": "\u0120Franz", "merges": "\u0120Fr anz", "count": 792, "type": "remove by frequency"} -{"id": 25651, "token": "\u00ce\u00b6", "merges": "\u00ce \u00b6", "count": 792, "type": "remove by frequency"} -{"id": 29136, "token": "\u0120VIII", "merges": "\u0120V III", "count": 793, "type": "remove by frequency"} -{"id": 4398, "token": "\u0120\\<", "merges": "\u0120\\ <", "count": 793, "type": "remove by frequency"} -{"id": 54141, "token": "\u00e9\u0123\u00b6", "merges": "\u00e9\u0123 \u00b6", "count": 793, "type": "remove by frequency"} -{"id": 32709, "token": "\u0120endoscopic", "merges": "\u0120end oscopic", "count": 794, "type": "remove by frequency"} -{"id": 16069, "token": "\u0120neuronal", "merges": "\u0120neur onal", "count": 794, "type": "remove by frequency"} -{"id": 47255, "token": "\u0120Anto", "merges": "\u0120An to", "count": 794, "type": "remove by frequency"} -{"id": 35810, "token": "ocellular", "merges": "oc ellular", "count": 795, "type": "remove by frequency"} -{"id": 34962, "token": "\u0120duality", "merges": "\u0120dual ity", "count": 795, "type": "remove by frequency"} -{"id": 35542, "token": "\u0120Ish", "merges": "\u0120I sh", "count": 795, "type": "remove by frequency"} -{"id": 41808, "token": "\u00c3\u00a9p", "merges": "\u00c3\u00a9 p", "count": 795, "type": "remove by frequency"} -{"id": 47687, "token": "\u0120individualized", "merges": "\u0120individual ized", "count": 796, "type": "remove by frequency"} -{"id": 49504, "token": "Affirmed", "merges": "Aff irmed", "count": 796, "type": "remove by frequency"} -{"id": 40435, "token": "\u0120Noble", "merges": "\u0120Nob le", "count": 796, "type": "remove by frequency"} -{"id": 50163, "token": "\u0120trich", "merges": "\u0120tr ich", "count": 796, "type": "remove by frequency"} -{"id": 27708, "token": "\u0120Leonard", "merges": "\u0120Leon ard", "count": 797, "type": "remove by frequency"} -{"id": 46252, "token": "\u0120spores", "merges": "\u0120sp ores", "count": 797, "type": "remove by frequency"} -{"id": 47556, "token": "\u0120delir", "merges": "\u0120del ir", "count": 797, "type": "remove by frequency"} -{"id": 30354, "token": "\u0120Dod", "merges": "\u0120D od", "count": 797, "type": "remove by frequency"} -{"id": 11110, "token": "}_\\", "merges": "}_ \\", "count": 797, "type": "remove by frequency"} -{"id": 45221, "token": "\u0120Preparation", "merges": "\u0120Pre paration", "count": 798, "type": "remove by frequency"} -{"id": 16294, "token": "\u0120inhibitory", "merges": "\u0120inhib itory", "count": 798, "type": "remove by frequency"} -{"id": 47261, "token": "\u0120insoluble", "merges": "\u0120insol uble", "count": 798, "type": "remove by frequency"} -{"id": 25645, "token": "defendant", "merges": "def endant", "count": 798, "type": "remove by frequency"} -{"id": 41361, "token": "\u0120**[", "merges": "\u0120** [", "count": 798, "type": "remove by frequency"} -{"id": 29100, "token": "\u0120relativistic", "merges": "\u0120rel ativistic", "count": 799, "type": "remove by frequency"} -{"id": 32237, "token": "\u0120Independence", "merges": "\u0120Ind ependence", "count": 799, "type": "remove by frequency"} -{"id": 41116, "token": "\u0120seminal", "merges": "\u0120sem inal", "count": 799, "type": "remove by frequency"} -{"id": 39119, "token": "\u0120Schwartz", "merges": "\u0120Sch wartz", "count": 800, "type": "remove by frequency"} -{"id": 47803, "token": "\u0120LSD", "merges": "\u0120L SD", "count": 800, "type": "remove by frequency"} -{"id": 26957, "token": "\u0120Abu", "merges": "\u0120Ab u", "count": 800, "type": "remove by frequency"} -{"id": 26690, "token": "\u0120clinicians", "merges": "\u0120clin icians", "count": 801, "type": "remove by frequency"} -{"id": 30975, "token": "\u0120questo", "merges": "\u0120quest o", "count": 801, "type": "remove by frequency"} -{"id": 25062, "token": "\u0120Karen", "merges": "\u0120K aren", "count": 801, "type": "remove by frequency"} -{"id": 25030, "token": "\u0120****,", "merges": "\u0120 ****,", "count": 801, "type": "remove by frequency"} -{"id": 34042, "token": "aughs", "merges": "augh s", "count": 801, "type": "remove by frequency"} -{"id": 32766, "token": "\u0120Hann", "merges": "\u0120H ann", "count": 801, "type": "remove by frequency"} -{"id": 35637, "token": "\u0120symplectic", "merges": "\u0120sym plectic", "count": 802, "type": "remove by frequency"} -{"id": 50116, "token": "\u0120hetero", "merges": "\u0120heter o", "count": 802, "type": "remove by frequency"} -{"id": 40696, "token": "\u0120dann", "merges": "\u0120d ann", "count": 802, "type": "remove by frequency"} -{"id": 48609, "token": "\u0120evaporated", "merges": "\u0120evapor ated", "count": 803, "type": "remove by frequency"} -{"id": 49301, "token": "\u0120budding", "merges": "\u0120bud ding", "count": 803, "type": "remove by frequency"} -{"id": 45625, "token": "\u0120lepton", "merges": "\u0120le pton", "count": 803, "type": "remove by frequency"} -{"id": 37932, "token": "\u0120Raven", "merges": "\u0120R aven", "count": 803, "type": "remove by frequency"} -{"id": 38233, "token": "\u0120ERA", "merges": "\u0120E RA", "count": 803, "type": "remove by frequency"} -{"id": 42237, "token": "\u0120fibrillation", "merges": "\u0120fibr illation", "count": 804, "type": "remove by frequency"} -{"id": 17592, "token": "\u0120hippocamp", "merges": "\u0120hipp ocamp", "count": 804, "type": "remove by frequency"} -{"id": 27013, "token": "\u0120remanded", "merges": "\u0120rem anded", "count": 804, "type": "remove by frequency"} -{"id": 29906, "token": "\u0120Bros", "merges": "\u0120B ros", "count": 804, "type": "remove by frequency"} -{"id": 14858, "token": "\u00ce\u00b9\u00ce\u00b1", "merges": "\u00ce\u00b9 \u00ce\u00b1", "count": 804, "type": "remove by frequency"} -{"id": 43108, "token": "\u0120abnormality", "merges": "\u0120abnorm ality", "count": 805, "type": "remove by frequency"} -{"id": 24203, "token": "artifactId", "merges": "artifact Id", "count": 805, "type": "remove by frequency"} -{"id": 38122, "token": "\u0120antibacterial", "merges": "\u0120antib acterial", "count": 806, "type": "remove by frequency"} -{"id": 29443, "token": "\u0120Wend", "merges": "\u0120W end", "count": 806, "type": "remove by frequency"} -{"id": 45082, "token": "\u0120Levy", "merges": "\u0120Lev y", "count": 806, "type": "remove by frequency"} -{"id": 36771, "token": "\u00d5\u00a1\u00d5", "merges": "\u00d5\u00a1 \u00d5", "count": 806, "type": "remove by frequency"} -{"id": 29127, "token": "\u0120testosterone", "merges": "\u0120test osterone", "count": 807, "type": "remove by frequency"} -{"id": 36848, "token": "\u0120ventricle", "merges": "\u0120ventric le", "count": 807, "type": "remove by frequency"} -{"id": 36858, "token": "\u0120Calvin", "merges": "\u0120Cal vin", "count": 807, "type": "remove by frequency"} -{"id": 49774, "token": "\u0120hacia", "merges": "\u0120h acia", "count": 807, "type": "remove by frequency"} -{"id": 25555, "token": "\u0120accretion", "merges": "\u0120accret ion", "count": 808, "type": "remove by frequency"} -{"id": 44741, "token": "\u0120thriller", "merges": "\u0120thr iller", "count": 808, "type": "remove by frequency"} -{"id": 38903, "token": "\u0120alors", "merges": "\u0120al ors", "count": 808, "type": "remove by frequency"} -{"id": 33219, "token": "\u0120KNOW", "merges": "\u0120K NOW", "count": 808, "type": "remove by frequency"} -{"id": 38312, "token": "\u0120Survival", "merges": "\u0120Surv ival", "count": 809, "type": "remove by frequency"} -{"id": 39751, "token": "\u0120fibrous", "merges": "\u0120fibr ous", "count": 809, "type": "remove by frequency"} -{"id": 45968, "token": "\u0120Rag", "merges": "\u0120R ag", "count": 809, "type": "remove by frequency"} -{"id": 46502, "token": "\u0120PDT", "merges": "\u0120PD T", "count": 809, "type": "remove by frequency"} -{"id": 27273, "token": "\u0120electrophoresis", "merges": "\u0120electroph oresis", "count": 810, "type": "remove by frequency"} -{"id": 31270, "token": "\u0120Kenneth", "merges": "\u0120Kenn eth", "count": 810, "type": "remove by frequency"} -{"id": 46345, "token": "antibodies", "merges": "antib odies", "count": 811, "type": "remove by frequency"} -{"id": 41932, "token": "\u0120Uh", "merges": "\u0120U h", "count": 811, "type": "remove by frequency"} -{"id": 48374, "token": "\u0120illuminating", "merges": "\u0120illumin ating", "count": 812, "type": "remove by frequency"} -{"id": 38277, "token": "\u0120neutrinos", "merges": "\u0120neut rinos", "count": 812, "type": "remove by frequency"} -{"id": 42761, "token": "\u00c3\u00b4le", "merges": "\u00c3\u00b4 le", "count": 812, "type": "remove by frequency"} -{"id": 31651, "token": "\u0120Ach", "merges": "\u0120A ch", "count": 812, "type": "remove by frequency"} -{"id": 35661, "token": "\u0120Eighth", "merges": "\u0120Eight h", "count": 813, "type": "remove by frequency"} -{"id": 46755, "token": "\u0120Influ", "merges": "\u0120In flu", "count": 813, "type": "remove by frequency"} -{"id": 41147, "token": "\u00d9\u012a\u00d9\u0126", "merges": "\u00d9\u012a \u00d9\u0126", "count": 813, "type": "remove by frequency"} -{"id": 41731, "token": "\u0120outper", "merges": "\u0120out per", "count": 814, "type": "remove by frequency"} -{"id": 28856, "token": "\u0120Laws", "merges": "\u0120Law s", "count": 814, "type": "remove by frequency"} -{"id": 41353, "token": "Bibliography", "merges": "Bib liography", "count": 815, "type": "remove by frequency"} -{"id": 42415, "token": "\u0120insulator", "merges": "\u0120ins ulator", "count": 815, "type": "remove by frequency"} -{"id": 48827, "token": "\u0120Costs", "merges": "\u0120Cost s", "count": 815, "type": "remove by frequency"} -{"id": 47674, "token": "\u0120Mayer", "merges": "\u0120May er", "count": 815, "type": "remove by frequency"} -{"id": 26511, "token": "\u0120Hern", "merges": "\u0120H ern", "count": 815, "type": "remove by frequency"} -{"id": 44986, "token": "\u0120MSM", "merges": "\u0120M SM", "count": 815, "type": "remove by frequency"} -{"id": 46158, "token": "\u0120travail", "merges": "\u0120trav ail", "count": 816, "type": "remove by frequency"} -{"id": 45899, "token": "\u0120macrom", "merges": "\u0120mac rom", "count": 816, "type": "remove by frequency"} -{"id": 34197, "token": "\u0120Apollo", "merges": "\u0120Ap ollo", "count": 816, "type": "remove by frequency"} -{"id": 37386, "token": "\u0120probl", "merges": "\u0120pro bl", "count": 816, "type": "remove by frequency"} -{"id": 43475, "token": "\u0120Tomorrow", "merges": "\u0120Tom orrow", "count": 817, "type": "remove by frequency"} -{"id": 27991, "token": "\u0120Bradley", "merges": "\u0120Brad ley", "count": 817, "type": "remove by frequency"} -{"id": 41990, "token": "\u0120priming", "merges": "\u0120prim ing", "count": 817, "type": "remove by frequency"} -{"id": 16955, "token": "\u0120Charlie", "merges": "\u0120Char lie", "count": 817, "type": "remove by frequency"} -{"id": 42307, "token": "\u0120Scout", "merges": "\u0120Sc out", "count": 817, "type": "remove by frequency"} -{"id": 41964, "token": "\u0120Sham", "merges": "\u0120Sh am", "count": 817, "type": "remove by frequency"} -{"id": 48097, "token": "\u0120Stro", "merges": "\u0120St ro", "count": 817, "type": "remove by frequency"} -{"id": 45236, "token": "\u0120HAD", "merges": "\u0120H AD", "count": 817, "type": "remove by frequency"} -{"id": 18178, "token": "\u0120nanoparticles", "merges": "\u0120nanop articles", "count": 818, "type": "remove by frequency"} -{"id": 43152, "token": "\u0120creepy", "merges": "\u0120creep y", "count": 818, "type": "remove by frequency"} -{"id": 10647, "token": "\u0120f\u00c3\u00b6r", "merges": "\u0120f \u00c3\u00b6r", "count": 818, "type": "remove by frequency"} -{"id": 46166, "token": "\u0120Vale", "merges": "\u0120V ale", "count": 818, "type": "remove by frequency"} -{"id": 30956, "token": "\u0120Cay", "merges": "\u0120C ay", "count": 818, "type": "remove by frequency"} -{"id": 47410, "token": "\u0120rearrangement", "merges": "\u0120rearrang ement", "count": 819, "type": "remove by frequency"} -{"id": 26144, "token": "\u0120dopamine", "merges": "\u0120dop amine", "count": 819, "type": "remove by frequency"} -{"id": 45420, "token": "\u0120caramel", "merges": "\u0120car amel", "count": 819, "type": "remove by frequency"} -{"id": 27974, "token": "\u0120atrial", "merges": "\u0120at rial", "count": 819, "type": "remove by frequency"} -{"id": 8033, "token": "\u0120Islam", "merges": "\u0120Is lam", "count": 819, "type": "remove by frequency"} -{"id": 13124, "token": "\u0120Europ", "merges": "\u0120E urop", "count": 819, "type": "remove by frequency"} -{"id": 19556, "token": "\u0120pathogenesis", "merges": "\u0120path ogenesis", "count": 820, "type": "remove by frequency"} -{"id": 48327, "token": "\u0120Enterprises", "merges": "\u0120Enter prises", "count": 820, "type": "remove by frequency"} -{"id": 33613, "token": "\u0120uterine", "merges": "\u0120uter ine", "count": 820, "type": "remove by frequency"} -{"id": 26134, "token": "\u0120Around", "merges": "\u0120Ar ound", "count": 820, "type": "remove by frequency"} -{"id": 36468, "token": "\u0120Carp", "merges": "\u0120Car p", "count": 820, "type": "remove by frequency"} -{"id": 38050, "token": "\u0120d\u00c3\u00a9s", "merges": "\u0120d \u00c3\u00a9s", "count": 821, "type": "remove by frequency"} -{"id": 43016, "token": "\u0120Hof", "merges": "\u0120H of", "count": 821, "type": "remove by frequency"} -{"id": 43861, "token": "\u0120infused", "merges": "\u0120inf used", "count": 822, "type": "remove by frequency"} -{"id": 49369, "token": "\u0120Mystery", "merges": "\u0120My stery", "count": 822, "type": "remove by frequency"} -{"id": 40524, "token": "\u0120Blu", "merges": "\u0120Bl u", "count": 822, "type": "remove by frequency"} -{"id": 12277, "token": "\u0120assays", "merges": "\u0120ass ays", "count": 823, "type": "remove by frequency"} -{"id": 48503, "token": "\u00e3\u0122\u0122\u00e3\u0122\u0122", "merges": "\u00e3\u0122\u0122 \u00e3\u0122\u0122", "count": 823, "type": "remove by frequency"} -{"id": 28257, "token": "\u0120Gand", "merges": "\u0120G and", "count": 824, "type": "remove by frequency"} -{"id": 35929, "token": "\u0120Crus", "merges": "\u0120Cr us", "count": 824, "type": "remove by frequency"} -{"id": 22303, "token": "\u0120NBC", "merges": "\u0120N BC", "count": 824, "type": "remove by frequency"} -{"id": 48068, "token": "\u0120FISH", "merges": "\u0120F ISH", "count": 825, "type": "remove by frequency"} -{"id": 36401, "token": "\u0120monomers", "merges": "\u0120mon omers", "count": 826, "type": "remove by frequency"} -{"id": 33085, "token": "\u0120vacate", "merges": "\u0120vac ate", "count": 826, "type": "remove by frequency"} -{"id": 23602, "token": "\u0120Hudson", "merges": "\u0120H udson", "count": 826, "type": "remove by frequency"} -{"id": 11268, "token": "\u0120Spain", "merges": "\u0120Sp ain", "count": 826, "type": "remove by frequency"} -{"id": 36883, "token": "\u0120Hugo", "merges": "\u0120Hug o", "count": 826, "type": "remove by frequency"} -{"id": 38553, "token": "\u0120Fry", "merges": "\u0120F ry", "count": 826, "type": "remove by frequency"} -{"id": 19049, "token": "\u0120,\\", "merges": "\u0120, \\", "count": 826, "type": "remove by frequency"} -{"id": 38838, "token": "omerase", "merges": "omer ase", "count": 827, "type": "remove by frequency"} -{"id": 12885, "token": "\u0120Holly", "merges": "\u0120Hol ly", "count": 827, "type": "remove by frequency"} -{"id": 49506, "token": "\u0120Essay", "merges": "\u0120Ess ay", "count": 827, "type": "remove by frequency"} -{"id": 49839, "token": "\u0120Tale", "merges": "\u0120T ale", "count": 828, "type": "remove by frequency"} -{"id": 13946, "token": "\u0120NFL", "merges": "\u0120N FL", "count": 828, "type": "remove by frequency"} -{"id": 17902, "token": "\u0120cortical", "merges": "\u0120cort ical", "count": 829, "type": "remove by frequency"} -{"id": 50105, "token": "\u0120Ket", "merges": "\u0120K et", "count": 829, "type": "remove by frequency"} -{"id": 34029, "token": "\u0120thrombosis", "merges": "\u0120thromb osis", "count": 830, "type": "remove by frequency"} -{"id": 32234, "token": "Originally", "merges": "Or iginally", "count": 830, "type": "remove by frequency"} -{"id": 44207, "token": "\u0120Thermo", "merges": "\u0120Ther mo", "count": 830, "type": "remove by frequency"} -{"id": 34606, "token": "\u00d9\u012a\u00d9\u0128", "merges": "\u00d9\u012a \u00d9\u0128", "count": 830, "type": "remove by frequency"} -{"id": 25873, "token": "\u00c3\u00b6d", "merges": "\u00c3\u00b6 d", "count": 830, "type": "remove by frequency"} -{"id": 30837, "token": "transferase", "merges": "transfer ase", "count": 831, "type": "remove by frequency"} -{"id": 40720, "token": "\u0120guitarist", "merges": "\u0120guitar ist", "count": 831, "type": "remove by frequency"} -{"id": 44198, "token": "\u0120Randolph", "merges": "\u0120Rand olph", "count": 831, "type": "remove by frequency"} -{"id": 39516, "token": "\u0120depolar", "merges": "\u0120dep olar", "count": 831, "type": "remove by frequency"} -{"id": 9060, "token": "\u0120Chief", "merges": "\u0120Ch ief", "count": 831, "type": "remove by frequency"} -{"id": 50026, "token": "\u0120Went", "merges": "\u0120W ent", "count": 831, "type": "remove by frequency"} -{"id": 38025, "token": "\u0120Hipp", "merges": "\u0120H ipp", "count": 831, "type": "remove by frequency"} -{"id": 8330, "token": "\u0120Wednesday", "merges": "\u0120Wed nesday", "count": 832, "type": "remove by frequency"} -{"id": 39704, "token": "\u0120ureth", "merges": "\u0120ure th", "count": 832, "type": "remove by frequency"} -{"id": 22212, "token": "\u0120Emperor", "merges": "\u0120Em peror", "count": 833, "type": "remove by frequency"} -{"id": 38826, "token": "\u0120Phosph", "merges": "\u0120Ph osph", "count": 833, "type": "remove by frequency"} -{"id": 38894, "token": "\u0120UIT", "merges": "\u0120U IT", "count": 833, "type": "remove by frequency"} -{"id": 8313, "token": "\u00c2\u00bd", "merges": "\u00c2 \u00bd", "count": 833, "type": "remove by frequency"} -{"id": 28079, "token": "\u0120Agriculture", "merges": "\u0120Agric ulture", "count": 834, "type": "remove by frequency"} -{"id": 47673, "token": "okinetics", "merges": "ok inetics", "count": 834, "type": "remove by frequency"} -{"id": 41809, "token": "\u0120placenta", "merges": "\u0120placent a", "count": 834, "type": "remove by frequency"} -{"id": 34162, "token": "\u0120atypical", "merges": "\u0120atyp ical", "count": 834, "type": "remove by frequency"} -{"id": 48702, "token": "\u0120Tickets", "merges": "\u0120T ickets", "count": 834, "type": "remove by frequency"} -{"id": 31999, "token": "\u0120Mats", "merges": "\u0120M ats", "count": 834, "type": "remove by frequency"} -{"id": 44141, "token": "\u0120}).", "merges": "\u0120} ).", "count": 834, "type": "remove by frequency"} -{"id": 27526, "token": "\u0120Prevention", "merges": "\u0120Pre vention", "count": 835, "type": "remove by frequency"} -{"id": 48112, "token": "\u0120retardation", "merges": "\u0120retard ation", "count": 836, "type": "remove by frequency"} -{"id": 38008, "token": "\u0120Orchestra", "merges": "\u0120Orche stra", "count": 836, "type": "remove by frequency"} -{"id": 34877, "token": "\u0120Kerr", "merges": "\u0120K err", "count": 836, "type": "remove by frequency"} -{"id": 18759, "token": "\u0120Nig", "merges": "\u0120N ig", "count": 836, "type": "remove by frequency"} -{"id": 39847, "token": "osyl", "merges": "os yl", "count": 836, "type": "remove by frequency"} -{"id": 11444, "token": "}^{-", "merges": "}^{ -", "count": 836, "type": "remove by frequency"} -{"id": 48751, "token": "\u0120cochle", "merges": "\u0120co chle", "count": 837, "type": "remove by frequency"} -{"id": 41720, "token": "\u0120Nokia", "merges": "\u0120N okia", "count": 837, "type": "remove by frequency"} -{"id": 30982, "token": "\u0120Bax", "merges": "\u0120B ax", "count": 837, "type": "remove by frequency"} -{"id": 45941, "token": "\u0120Bis", "merges": "\u0120B is", "count": 837, "type": "remove by frequency"} -{"id": 47983, "token": "\u0120metaphysical", "merges": "\u0120metaphys ical", "count": 838, "type": "remove by frequency"} -{"id": 47450, "token": "\u0120coworkers", "merges": "\u0120cowork ers", "count": 838, "type": "remove by frequency"} -{"id": 21467, "token": "\u0120necrosis", "merges": "\u0120nec rosis", "count": 838, "type": "remove by frequency"} -{"id": 29301, "token": "\u0120Animals", "merges": "\u0120An imals", "count": 838, "type": "remove by frequency"} -{"id": 38370, "token": "\u0120Bruno", "merges": "\u0120Brun o", "count": 838, "type": "remove by frequency"} -{"id": 35735, "token": "\u0120Laf", "merges": "\u0120L af", "count": 838, "type": "remove by frequency"} -{"id": 45479, "token": "\u0120PLL", "merges": "\u0120P LL", "count": 838, "type": "remove by frequency"} -{"id": 11227, "token": ",$$", "merges": ", $$", "count": 838, "type": "remove by frequency"} -{"id": 50329, "token": "\u00e2\u012b\u00a6", "merges": "\u00e2\u012b \u00a6", "count": 838, "type": "remove by frequency"} -{"id": 43967, "token": "\u0120homogeneity", "merges": "\u0120hom ogeneity", "count": 839, "type": "remove by frequency"} -{"id": 25084, "token": "\u0120Moses", "merges": "\u0120M oses", "count": 839, "type": "remove by frequency"} -{"id": 42833, "token": "irubin", "merges": "ir ubin", "count": 839, "type": "remove by frequency"} -{"id": 40285, "token": "\u0120Ost", "merges": "\u0120O st", "count": 839, "type": "remove by frequency"} -{"id": 39291, "token": "\u00ef\u00bd\u00a1", "merges": "\u00ef\u00bd \u00a1", "count": 839, "type": "remove by frequency"} -{"id": 19964, "token": "\u00c2\u00aa", "merges": "\u00c2 \u00aa", "count": 839, "type": "remove by frequency"} -{"id": 26267, "token": "\u0120ArrayList", "merges": "\u0120Array List", "count": 840, "type": "remove by frequency"} -{"id": 37454, "token": "\u0120Fischer", "merges": "\u0120F ischer", "count": 840, "type": "remove by frequency"} -{"id": 28833, "token": "\u0120somatic", "merges": "\u0120som atic", "count": 840, "type": "remove by frequency"} -{"id": 31603, "token": "\u0120Crist", "merges": "\u0120C rist", "count": 840, "type": "remove by frequency"} -{"id": 17538, "token": "\u0120Fifth", "merges": "\u0120Fif th", "count": 840, "type": "remove by frequency"} -{"id": 43299, "token": "\u0120Fem", "merges": "\u0120F em", "count": 840, "type": "remove by frequency"} -{"id": 35429, "token": "\u00d8\u00b6", "merges": "\u00d8 \u00b6", "count": 840, "type": "remove by frequency"} -{"id": 29940, "token": "\u0120stenosis", "merges": "\u0120sten osis", "count": 841, "type": "remove by frequency"} -{"id": 5664, "token": "boldsymbol", "merges": "bolds ymbol", "count": 842, "type": "remove by frequency"} -{"id": 36091, "token": "\u0120Mercury", "merges": "\u0120Merc ury", "count": 842, "type": "remove by frequency"} -{"id": 44071, "token": "\u0120Victory", "merges": "\u0120Vict ory", "count": 842, "type": "remove by frequency"} -{"id": 15696, "token": "\u0120Nether", "merges": "\u0120Net her", "count": 842, "type": "remove by frequency"} -{"id": 40552, "token": "\u00c3\u00a9qu", "merges": "\u00c3\u00a9 qu", "count": 842, "type": "remove by frequency"} -{"id": 45835, "token": "\u0120-------------------------------", "merges": "\u0120---------------- ---------------", "count": 843, "type": "remove by frequency"} -{"id": 28773, "token": "\u0120femoral", "merges": "\u0120fem oral", "count": 843, "type": "remove by frequency"} -{"id": 29169, "token": "\u0120Pearl", "merges": "\u0120Pear l", "count": 843, "type": "remove by frequency"} -{"id": 46568, "token": "\u0120Fres", "merges": "\u0120F res", "count": 843, "type": "remove by frequency"} -{"id": 20669, "token": ")}_{", "merges": ") }_{", "count": 843, "type": "remove by frequency"} -{"id": 45301, "token": "]\\\\", "merges": "] \\\\", "count": 843, "type": "remove by frequency"} -{"id": 43375, "token": "\u0120Rough", "merges": "\u0120R ough", "count": 844, "type": "remove by frequency"} -{"id": 42538, "token": "\u0120Mond", "merges": "\u0120M ond", "count": 844, "type": "remove by frequency"} -{"id": 36489, "token": "\u0120voir", "merges": "\u0120vo ir", "count": 844, "type": "remove by frequency"} -{"id": 48582, "token": "\u0120Rim", "merges": "\u0120R im", "count": 844, "type": "remove by frequency"} -{"id": 37602, "token": "\u0120Erd", "merges": "\u0120E rd", "count": 844, "type": "remove by frequency"} -{"id": 11549, "token": "\u0120\\;", "merges": "\u0120\\ ;", "count": 844, "type": "remove by frequency"} -{"id": 30442, "token": "\u0120Clinic", "merges": "\u0120Cl inic", "count": 845, "type": "remove by frequency"} -{"id": 28750, "token": "\u0120Mills", "merges": "\u0120M ills", "count": 845, "type": "remove by frequency"} -{"id": 25498, "token": "\u0120Gay", "merges": "\u0120G ay", "count": 845, "type": "remove by frequency"} -{"id": 43734, "token": "\u0120phosphor", "merges": "\u0120phosph or", "count": 846, "type": "remove by frequency"} -{"id": 41963, "token": "\u0120Thomson", "merges": "\u0120Thom son", "count": 846, "type": "remove by frequency"} -{"id": 48621, "token": "\u0120atheist", "merges": "\u0120athe ist", "count": 846, "type": "remove by frequency"} -{"id": 30674, "token": "\u0120Reich", "merges": "\u0120Re ich", "count": 846, "type": "remove by frequency"} -{"id": 35300, "token": "\u00e3\u0123\u00ab\u00e3\u0123\u00af", "merges": "\u00e3\u0123\u00ab \u00e3\u0123\u00af", "count": 846, "type": "remove by frequency"} -{"id": 21217, "token": "\u0120Verm", "merges": "\u0120V erm", "count": 846, "type": "remove by frequency"} -{"id": 32632, "token": "\u0120Wool", "merges": "\u0120W ool", "count": 846, "type": "remove by frequency"} -{"id": 25198, "token": "\u0120Jag", "merges": "\u0120J ag", "count": 846, "type": "remove by frequency"} -{"id": 47236, "token": "formaldehyde", "merges": "formal dehyde", "count": 847, "type": "remove by frequency"} -{"id": 18985, "token": "\u0120Jonathan", "merges": "\u0120Jon athan", "count": 847, "type": "remove by frequency"} -{"id": 41925, "token": "\u0120Subsequent", "merges": "\u0120Sub sequent", "count": 848, "type": "remove by frequency"} -{"id": 33017, "token": "\u0120Cathedral", "merges": "\u0120Cat hedral", "count": 848, "type": "remove by frequency"} -{"id": 37549, "token": "\u0120Genes", "merges": "\u0120Gen es", "count": 848, "type": "remove by frequency"} -{"id": 32508, "token": "\u0120dissection", "merges": "\u0120dis section", "count": 849, "type": "remove by frequency"} -{"id": 47041, "token": "\u0120infinites", "merges": "\u0120infin ites", "count": 849, "type": "remove by frequency"} -{"id": 45712, "token": "itars", "merges": "it ars", "count": 849, "type": "remove by frequency"} -{"id": 28962, "token": "\u0120Raf", "merges": "\u0120R af", "count": 849, "type": "remove by frequency"} -{"id": 38707, "token": "\u0120Gothic", "merges": "\u0120Goth ic", "count": 850, "type": "remove by frequency"} -{"id": 52951, "token": "\u00e7\u0139\u00ba", "merges": "\u00e7\u0139 \u00ba", "count": 850, "type": "remove by frequency"} -{"id": 43245, "token": "\u0120computationally", "merges": "\u0120computation ally", "count": 851, "type": "remove by frequency"} -{"id": 34728, "token": "\u0120esophageal", "merges": "\u0120es ophageal", "count": 851, "type": "remove by frequency"} -{"id": 48066, "token": "\u0120counterclaim", "merges": "\u0120counter claim", "count": 852, "type": "remove by frequency"} -{"id": 23878, "token": "\u0120colorectal", "merges": "\u0120col orectal", "count": 852, "type": "remove by frequency"} -{"id": 46384, "token": "\u0120Savings", "merges": "\u0120Sav ings", "count": 852, "type": "remove by frequency"} -{"id": 45571, "token": "\u0120equilib", "merges": "\u0120equ ilib", "count": 852, "type": "remove by frequency"} -{"id": 44442, "token": "\u0120Tian", "merges": "\u0120T ian", "count": 852, "type": "remove by frequency"} -{"id": 10350, "token": "\u0120Constitution", "merges": "\u0120Con stitution", "count": 853, "type": "remove by frequency"} -{"id": 23001, "token": "\u0120Springer", "merges": "\u0120Spring er", "count": 853, "type": "remove by frequency"} -{"id": 44278, "token": "\u0120Organic", "merges": "\u0120Organ ic", "count": 853, "type": "remove by frequency"} -{"id": 33843, "token": "\u0120Canyon", "merges": "\u0120C anyon", "count": 853, "type": "remove by frequency"} -{"id": 30097, "token": "\u0120ethyl", "merges": "\u0120 ethyl", "count": 853, "type": "remove by frequency"} -{"id": 47730, "token": "autres", "merges": "aut res", "count": 853, "type": "remove by frequency"} -{"id": 21160, "token": "\u0120Bios", "merges": "\u0120B ios", "count": 853, "type": "remove by frequency"} -{"id": 16411, "token": "\u00cf\u0126\u00ce\u00b5", "merges": "\u00cf\u0126 \u00ce\u00b5", "count": 853, "type": "remove by frequency"} -{"id": 48274, "token": "\u0120Boltzmann", "merges": "\u0120B oltzmann", "count": 854, "type": "remove by frequency"} -{"id": 31658, "token": "\u0120estoppel", "merges": "\u0120est oppel", "count": 854, "type": "remove by frequency"} -{"id": 39946, "token": "\u0120viscous", "merges": "\u0120vis cous", "count": 854, "type": "remove by frequency"} -{"id": 39634, "token": "\u0120Stokes", "merges": "\u0120St okes", "count": 854, "type": "remove by frequency"} -{"id": 14180, "token": "\u0120Gree", "merges": "\u0120G ree", "count": 854, "type": "remove by frequency"} -{"id": 38776, "token": "\u0120Rho", "merges": "\u0120R ho", "count": 854, "type": "remove by frequency"} -{"id": 42360, "token": "r\u00c3\u0143", "merges": "r \u00c3\u0143", "count": 854, "type": "remove by frequency"} -{"id": 28854, "token": "\u0120Palmer", "merges": "\u0120Pal mer", "count": 855, "type": "remove by frequency"} -{"id": 34533, "token": "\u0120Hank", "merges": "\u0120H ank", "count": 855, "type": "remove by frequency"} -{"id": 47910, "token": "epithelial", "merges": "epit helial", "count": 856, "type": "remove by frequency"} -{"id": 26768, "token": "\u0120neurop", "merges": "\u0120ne urop", "count": 856, "type": "remove by frequency"} -{"id": 39058, "token": "fiddle", "merges": "f iddle", "count": 856, "type": "remove by frequency"} -{"id": 30193, "token": "\u0120Palm", "merges": "\u0120Pal m", "count": 856, "type": "remove by frequency"} -{"id": 47269, "token": "}}<", "merges": "; \"><", "count": 941, "type": "remove by frequency"} -{"id": 42197, "token": "\u0120sedentary", "merges": "\u0120sed entary", "count": 942, "type": "remove by frequency"} -{"id": 8216, "token": "\u0120Thursday", "merges": "\u0120Th ursday", "count": 943, "type": "remove by frequency"} -{"id": 28447, "token": "\u0120opioid", "merges": "\u0120opio id", "count": 943, "type": "remove by frequency"} -{"id": 22556, "token": "\u0120Tib", "merges": "\u0120T ib", "count": 943, "type": "remove by frequency"} -{"id": 41922, "token": "\u0120mediates", "merges": "\u0120medi ates", "count": 944, "type": "remove by frequency"} -{"id": 24008, "token": "\u0120Hughes", "merges": "\u0120Hug hes", "count": 944, "type": "remove by frequency"} -{"id": 31627, "token": "\u0120n\u00c3\u00a9", "merges": "\u0120n \u00c3\u00a9", "count": 944, "type": "remove by frequency"} -{"id": 11881, "token": "*~*", "merges": "*~ *", "count": 944, "type": "remove by frequency"} -{"id": 45320, "token": "\u0120bipartisan", "merges": "\u0120bipart isan", "count": 945, "type": "remove by frequency"} -{"id": 35499, "token": "\u0120Employment", "merges": "\u0120Employ ment", "count": 945, "type": "remove by frequency"} -{"id": 27233, "token": "\u0120o\u00c3\u00b9", "merges": "\u0120o \u00c3\u00b9", "count": 945, "type": "remove by frequency"} -{"id": 44300, "token": "rugu", "merges": "rug u", "count": 945, "type": "remove by frequency"} -{"id": 23630, "token": "\u0120\u00d9\u0126", "merges": "\u0120 \u00d9\u0126", "count": 945, "type": "remove by frequency"} -{"id": 38487, "token": "IOException", "merges": "IO Exception", "count": 946, "type": "remove by frequency"} -{"id": 23728, "token": "\u0120Archives", "merges": "\u0120Arch ives", "count": 946, "type": "remove by frequency"} -{"id": 44610, "token": "\u0120Mai", "merges": "\u0120M ai", "count": 946, "type": "remove by frequency"} -{"id": 22155, "token": "\u0120superconduct", "merges": "\u0120super conduct", "count": 947, "type": "remove by frequency"} -{"id": 48790, "token": "\u0120Chaos", "merges": "\u0120Cha os", "count": 947, "type": "remove by frequency"} -{"id": 38117, "token": "\u0120wordt", "merges": "\u0120word t", "count": 947, "type": "remove by frequency"} -{"id": 26226, "token": "\u0120Wes", "merges": "\u0120W es", "count": 947, "type": "remove by frequency"} -{"id": 36953, "token": "\u0120t\u00c3\u00a9", "merges": "\u0120t \u00c3\u00a9", "count": 947, "type": "remove by frequency"} -{"id": 13602, "token": "\u0120Crim", "merges": "\u0120C rim", "count": 948, "type": "remove by frequency"} -{"id": 15478, "token": "\u00c8\u013d", "merges": "\u00c8 \u013d", "count": 948, "type": "remove by frequency"} -{"id": 46743, "token": "\u0120superimposed", "merges": "\u0120super imposed", "count": 949, "type": "remove by frequency"} -{"id": 35873, "token": "\u0120radiative", "merges": "\u0120radi ative", "count": 949, "type": "remove by frequency"} -{"id": 17106, "token": "\u0120Neither", "merges": "\u0120Ne ither", "count": 949, "type": "remove by frequency"} -{"id": 39875, "token": "\u0120euthan", "merges": "\u0120e uthan", "count": 949, "type": "remove by frequency"} -{"id": 23216, "token": "n\u00c3\u0143", "merges": "n \u00c3\u0143", "count": 949, "type": "remove by frequency"} -{"id": 39342, "token": "\u0120inversely", "merges": "\u0120in versely", "count": 950, "type": "remove by frequency"} -{"id": 10392, "token": ")}$", "merges": ") }$", "count": 950, "type": "remove by frequency"} -{"id": 40184, "token": "\u0120indistinguishable", "merges": "\u0120indist inguishable", "count": 951, "type": "remove by frequency"} -{"id": 41818, "token": "\u0120notations", "merges": "\u0120not ations", "count": 951, "type": "remove by frequency"} -{"id": 31286, "token": "\u0120WHE", "merges": "\u0120W HE", "count": 951, "type": "remove by frequency"} -{"id": 32966, "token": "($_", "merges": "($ _", "count": 951, "type": "remove by frequency"} -{"id": 29029, "token": "\u0120selectivity", "merges": "\u0120select ivity", "count": 952, "type": "remove by frequency"} -{"id": 31961, "token": "=\"${", "merges": "=\" ${", "count": 952, "type": "remove by frequency"} -{"id": 44424, "token": "\u0120bek", "merges": "\u0120be k", "count": 952, "type": "remove by frequency"} -{"id": 26427, "token": "\u0120dehydrogen", "merges": "\u0120de hydrogen", "count": 953, "type": "remove by frequency"} -{"id": 26663, "token": "\u0120homolog", "merges": "\u0120hom olog", "count": 953, "type": "remove by frequency"} -{"id": 49759, "token": "\u0120wafers", "merges": "\u0120w afers", "count": 953, "type": "remove by frequency"} -{"id": 43267, "token": "\u0120Chest", "merges": "\u0120C hest", "count": 953, "type": "remove by frequency"} -{"id": 47562, "token": ")$\\", "merges": ")$ \\", "count": 953, "type": "remove by frequency"} -{"id": 24031, "token": "\u0120ischemia", "merges": "\u0120is chemia", "count": 954, "type": "remove by frequency"} -{"id": 50194, "token": "\u0120ejemplo", "merges": "\u0120ej emplo", "count": 954, "type": "remove by frequency"} -{"id": 22218, "token": "\u0120Freder", "merges": "\u0120Fred er", "count": 954, "type": "remove by frequency"} -{"id": 21551, "token": "arlier", "merges": "ar lier", "count": 954, "type": "remove by frequency"} -{"id": 40118, "token": "\u0120lute", "merges": "\u0120l ute", "count": 954, "type": "remove by frequency"} -{"id": 48307, "token": "~).", "merges": "~ ).", "count": 954, "type": "remove by frequency"} -{"id": 26166, "token": "\u0120cosmological", "merges": "\u0120cosm ological", "count": 955, "type": "remove by frequency"} -{"id": 34184, "token": "\u0120Waters", "merges": "\u0120Wat ers", "count": 955, "type": "remove by frequency"} -{"id": 20554, "token": "\u0120Oliver", "merges": "\u0120O liver", "count": 955, "type": "remove by frequency"} -{"id": 9972, "token": "\u0120Italy", "merges": "\u0120It aly", "count": 955, "type": "remove by frequency"} -{"id": 41558, "token": "\u0120Zhou", "merges": "\u0120Z hou", "count": 955, "type": "remove by frequency"} -{"id": 33135, "token": "\u0120Koch", "merges": "\u0120K och", "count": 955, "type": "remove by frequency"} -{"id": 44149, "token": "\u0120MMA", "merges": "\u0120M MA", "count": 955, "type": "remove by frequency"} -{"id": 43558, "token": "^*,", "merges": "^ *,", "count": 955, "type": "remove by frequency"} -{"id": 45232, "token": "\u0120\u00d8\u00ae", "merges": "\u0120\u00d8 \u00ae", "count": 955, "type": "remove by frequency"} -{"id": 16783, "token": "\u0120methylation", "merges": "\u0120methyl ation", "count": 956, "type": "remove by frequency"} -{"id": 35051, "token": "\u0120solubility", "merges": "\u0120solub ility", "count": 956, "type": "remove by frequency"} -{"id": 46587, "token": "Eventually", "merges": "Event ually", "count": 956, "type": "remove by frequency"} -{"id": 24425, "token": "\u0120Bapt", "merges": "\u0120B apt", "count": 956, "type": "remove by frequency"} -{"id": 23312, "token": "\u0120HPV", "merges": "\u0120H PV", "count": 956, "type": "remove by frequency"} -{"id": 39715, "token": "phosphory", "merges": "phosph ory", "count": 957, "type": "remove by frequency"} -{"id": 39357, "token": "Biography", "merges": "Bi ography", "count": 957, "type": "remove by frequency"} -{"id": 33294, "token": "\u0120Henri", "merges": "\u0120Hen ri", "count": 957, "type": "remove by frequency"} -{"id": 46155, "token": "\u0120Tales", "merges": "\u0120T ales", "count": 957, "type": "remove by frequency"} -{"id": 30012, "token": "\u0120Kas", "merges": "\u0120K as", "count": 957, "type": "remove by frequency"} -{"id": 42332, "token": "\u0120-----------------------------", "merges": "\u0120---------------- -------------", "count": 958, "type": "remove by frequency"} -{"id": 38587, "token": "\u0120nanot", "merges": "\u0120nan ot", "count": 958, "type": "remove by frequency"} -{"id": 35359, "token": "\u0120Swan", "merges": "\u0120Sw an", "count": 958, "type": "remove by frequency"} -{"id": 36482, "token": "\u0120Lil", "merges": "\u0120L il", "count": 958, "type": "remove by frequency"} -{"id": 40083, "token": "\u0120inpatient", "merges": "\u0120in patient", "count": 959, "type": "remove by frequency"} -{"id": 23463, "token": "\u0120neutrino", "merges": "\u0120neutr ino", "count": 959, "type": "remove by frequency"} -{"id": 35845, "token": "\u0120excision", "merges": "\u0120exc ision", "count": 959, "type": "remove by frequency"} -{"id": 39872, "token": "\u0120glaucoma", "merges": "\u0120gl aucoma", "count": 959, "type": "remove by frequency"} -{"id": 36566, "token": "\u0120isoform", "merges": "\u0120iso form", "count": 959, "type": "remove by frequency"} -{"id": 16203, "token": "\u0120myel", "merges": "\u0120my el", "count": 959, "type": "remove by frequency"} -{"id": 35790, "token": "\u00d7\u0135", "merges": "\u00d7 \u0135", "count": 959, "type": "remove by frequency"} -{"id": 38257, "token": "Populated", "merges": "Pop ulated", "count": 960, "type": "remove by frequency"} -{"id": 50109, "token": "\u0120punches", "merges": "\u0120pun ches", "count": 960, "type": "remove by frequency"} -{"id": 24611, "token": "\u0120Harvey", "merges": "\u0120Har vey", "count": 960, "type": "remove by frequency"} -{"id": 36380, "token": "\u0120Meth", "merges": "\u0120M eth", "count": 960, "type": "remove by frequency"} -{"id": 41903, "token": "\u0120Kai", "merges": "\u0120K ai", "count": 960, "type": "remove by frequency"} -{"id": 25269, "token": "\u0120\u00d7\u0136", "merges": "\u0120\u00d7 \u0136", "count": 960, "type": "remove by frequency"} -{"id": 45135, "token": "\"){", "merges": "\" ){", "count": 960, "type": "remove by frequency"} -{"id": 44925, "token": "\u0120Heroes", "merges": "\u0120Hero es", "count": 961, "type": "remove by frequency"} -{"id": 39330, "token": "\u0120characterizing", "merges": "\u0120character izing", "count": 962, "type": "remove by frequency"} -{"id": 45797, "token": "\u0120Dew", "merges": "\u0120D ew", "count": 962, "type": "remove by frequency"} -{"id": 47992, "token": "\u0120Trials", "merges": "\u0120Tri als", "count": 963, "type": "remove by frequency"} -{"id": 28093, "token": "\u0120Haven", "merges": "\u0120H aven", "count": 963, "type": "remove by frequency"} -{"id": 47808, "token": "\u0120HCl", "merges": "\u0120H Cl", "count": 963, "type": "remove by frequency"} -{"id": 22224, "token": "\u00e0\u00a5\u0122", "merges": "\u00e0\u00a5 \u0122", "count": 963, "type": "remove by frequency"} -{"id": 42254, "token": "\u0120misrepresentation", "merges": "\u0120misrepresent ation", "count": 964, "type": "remove by frequency"} -{"id": 12533, "token": "\u0120macroph", "merges": "\u0120mac roph", "count": 964, "type": "remove by frequency"} -{"id": 42885, "token": "\u0120diff\u00c3\u00a9", "merges": "\u0120diff \u00c3\u00a9", "count": 964, "type": "remove by frequency"} -{"id": 42722, "token": "\u0120sempre", "merges": "\u0120sem pre", "count": 964, "type": "remove by frequency"} -{"id": 13247, "token": "\u0120Korean", "merges": "\u0120Kore an", "count": 964, "type": "remove by frequency"} -{"id": 26066, "token": "\u0120Brooks", "merges": "\u0120Brook s", "count": 964, "type": "remove by frequency"} -{"id": 17946, "token": "\u00cf\u0123\u00ce\u00b1", "merges": "\u00cf\u0123 \u00ce\u00b1", "count": 964, "type": "remove by frequency"} -{"id": 41373, "token": "\u0120malpractice", "merges": "\u0120mal practice", "count": 965, "type": "remove by frequency"} -{"id": 49488, "token": "\u0120tossing", "merges": "\u0120toss ing", "count": 965, "type": "remove by frequency"} -{"id": 43181, "token": "\u0120patter", "merges": "\u0120pat ter", "count": 965, "type": "remove by frequency"} -{"id": 11042, "token": "\u0120Swed", "merges": "\u0120Sw ed", "count": 965, "type": "remove by frequency"} -{"id": 42139, "token": "\u0120anter", "merges": "\u0120an ter", "count": 966, "type": "remove by frequency"} -{"id": 47433, "token": "\u0120Dee", "merges": "\u0120De e", "count": 966, "type": "remove by frequency"} -{"id": 24303, "token": ",\\\\", "merges": ",\\ \\", "count": 966, "type": "remove by frequency"} -{"id": 31860, "token": "\u0120Ik", "merges": "\u0120I k", "count": 966, "type": "remove by frequency"} -{"id": 53535, "token": "\u00e8\u0126\u0129", "merges": "\u00e8\u0126 \u0129", "count": 966, "type": "remove by frequency"} -{"id": 17957, "token": "\u0120hydroph", "merges": "\u0120hydro ph", "count": 967, "type": "remove by frequency"} -{"id": 45681, "token": "\u0120Ludwig", "merges": "\u0120Lud wig", "count": 967, "type": "remove by frequency"} -{"id": 23599, "token": "\u0120vocals", "merges": "\u0120voc als", "count": 967, "type": "remove by frequency"} -{"id": 31948, "token": "rocyte", "merges": "rocy te", "count": 967, "type": "remove by frequency"} -{"id": 31976, "token": "\u0120Lith", "merges": "\u0120L ith", "count": 967, "type": "remove by frequency"} -{"id": 33320, "token": "\u00c3\u00b3g", "merges": "\u00c3\u00b3 g", "count": 967, "type": "remove by frequency"} -{"id": 37651, "token": "\u0120distinguishable", "merges": "\u0120distinguish able", "count": 968, "type": "remove by frequency"} -{"id": 42249, "token": "\u0120imagining", "merges": "\u0120imag ining", "count": 968, "type": "remove by frequency"} -{"id": 42141, "token": "\u0120Deutsch", "merges": "\u0120Deut sch", "count": 968, "type": "remove by frequency"} -{"id": 29589, "token": "blockList", "merges": "block List", "count": 969, "type": "remove by frequency"} -{"id": 40392, "token": "\u0120Bec", "merges": "\u0120B ec", "count": 969, "type": "remove by frequency"} -{"id": 21725, "token": "\u0120cleavage", "merges": "\u0120cle avage", "count": 970, "type": "remove by frequency"} -{"id": 33004, "token": "\u0120Freeman", "merges": "\u0120Fre eman", "count": 970, "type": "remove by frequency"} -{"id": 39146, "token": "\u00e1\u0125\u0136\u00e1\u0125", "merges": "\u00e1\u0125\u0136 \u00e1\u0125", "count": 970, "type": "remove by frequency"} -{"id": 39149, "token": "\u0120conventionally", "merges": "\u0120convention ally", "count": 971, "type": "remove by frequency"} -{"id": 41651, "token": "\u0120storytelling", "merges": "\u0120story telling", "count": 971, "type": "remove by frequency"} -{"id": 35228, "token": "\u0120PlayStation", "merges": "\u0120Play Station", "count": 971, "type": "remove by frequency"} -{"id": 31680, "token": "\u0120progenitor", "merges": "\u0120progen itor", "count": 971, "type": "remove by frequency"} -{"id": 34199, "token": "\u0120burglary", "merges": "\u0120burgl ary", "count": 971, "type": "remove by frequency"} -{"id": 50162, "token": "\u0120Fang", "merges": "\u0120F ang", "count": 971, "type": "remove by frequency"} -{"id": 46656, "token": "\u0120DRAM", "merges": "\u0120DR AM", "count": 971, "type": "remove by frequency"} -{"id": 36403, "token": "\u00d7\u0139", "merges": "\u00d7 \u0139", "count": 971, "type": "remove by frequency"} -{"id": 47933, "token": "\u0120homestead", "merges": "\u0120hom estead", "count": 972, "type": "remove by frequency"} -{"id": 30331, "token": "\u0120\u00d8\u00a7\u00d9\u0126\u00d9\u0127", "merges": "\u0120\u00d8\u00a7\u00d9\u0126 \u00d9\u0127", "count": 972, "type": "remove by frequency"} -{"id": 15940, "token": "\u0120Rick", "merges": "\u0120R ick", "count": 972, "type": "remove by frequency"} -{"id": 18481, "token": "\u00c3\u00a5r", "merges": "\u00c3\u00a5 r", "count": 972, "type": "remove by frequency"} -{"id": 31468, "token": "\u00d7\u00a2", "merges": "\u00d7 \u00a2", "count": 972, "type": "remove by frequency"} -{"id": 38744, "token": "\u0120mediating", "merges": "\u0120medi ating", "count": 973, "type": "remove by frequency"} -{"id": 27581, "token": "\u0120Gabriel", "merges": "\u0120Gab riel", "count": 973, "type": "remove by frequency"} -{"id": 47806, "token": "\u0120juxt", "merges": "\u0120ju xt", "count": 973, "type": "remove by frequency"} -{"id": 24800, "token": "|}", "merges": "| }", "count": 973, "type": "remove by frequency"} -{"id": 30772, "token": "\u0120splicing", "merges": "\u0120splic ing", "count": 974, "type": "remove by frequency"} -{"id": 27080, "token": "\u0120Daw", "merges": "\u0120D aw", "count": 974, "type": "remove by frequency"} -{"id": 47714, "token": "\u0120CTL", "merges": "\u0120CT L", "count": 974, "type": "remove by frequency"} -{"id": 11011, "token": "\u0120Ireland", "merges": "\u0120I reland", "count": 975, "type": "remove by frequency"} -{"id": 28178, "token": "\u0120Salv", "merges": "\u0120Sal v", "count": 975, "type": "remove by frequency"} -{"id": 31803, "token": "bben", "merges": "bb en", "count": 975, "type": "remove by frequency"} -{"id": 48916, "token": "\u0120Tao", "merges": "\u0120T ao", "count": 975, "type": "remove by frequency"} -{"id": 45838, "token": "\u0120Xin", "merges": "\u0120X in", "count": 975, "type": "remove by frequency"} -{"id": 9121, "token": "\u0120\\{", "merges": "\u0120\\ {", "count": 975, "type": "remove by frequency"} -{"id": 40717, "token": "\u0120Agricultural", "merges": "\u0120Agric ultural", "count": 976, "type": "remove by frequency"} -{"id": 48647, "token": "\u0120Bernstein", "merges": "\u0120Bern stein", "count": 976, "type": "remove by frequency"} -{"id": 29544, "token": "\u0120Emm", "merges": "\u0120E mm", "count": 976, "type": "remove by frequency"} -{"id": 35291, "token": "\u0120Aim", "merges": "\u0120A im", "count": 977, "type": "remove by frequency"} -{"id": 50351, "token": "\u00e2\u0137\u00af", "merges": "\u00e2\u0137 \u00af", "count": 977, "type": "remove by frequency"} -{"id": 19717, "token": "\u0120Christopher", "merges": "\u0120Christ opher", "count": 978, "type": "remove by frequency"} -{"id": 44871, "token": "\u0120palliative", "merges": "\u0120pall iative", "count": 978, "type": "remove by frequency"} -{"id": 42884, "token": "\u0120decid", "merges": "\u0120dec id", "count": 978, "type": "remove by frequency"} -{"id": 33216, "token": "\u0120Jazz", "merges": "\u0120J azz", "count": 978, "type": "remove by frequency"} -{"id": 51151, "token": "\u00e5\u013d\u012b", "merges": "\u00e5\u013d \u012b", "count": 978, "type": "remove by frequency"} -{"id": 36657, "token": "\u0120Coming", "merges": "\u0120Com ing", "count": 979, "type": "remove by frequency"} -{"id": 30312, "token": "\u0120corne", "merges": "\u0120cor ne", "count": 979, "type": "remove by frequency"} -{"id": 21178, "token": "\u0120Gonz", "merges": "\u0120G onz", "count": 979, "type": "remove by frequency"} -{"id": 21130, "token": "\u0120Andy", "merges": "\u0120And y", "count": 979, "type": "remove by frequency"} -{"id": 40350, "token": "\u0120rigidity", "merges": "\u0120rig idity", "count": 980, "type": "remove by frequency"} -{"id": 45956, "token": "\u0120exerts", "merges": "\u0120exert s", "count": 980, "type": "remove by frequency"} -{"id": 18088, "token": "\u0120Vegas", "merges": "\u0120Veg as", "count": 980, "type": "remove by frequency"} -{"id": 18975, "token": "\u0120Gary", "merges": "\u0120G ary", "count": 980, "type": "remove by frequency"} -{"id": 34986, "token": "\u0120waveguide", "merges": "\u0120wave guide", "count": 981, "type": "remove by frequency"} -{"id": 31084, "token": "\u0120pelvic", "merges": "\u0120pel vic", "count": 981, "type": "remove by frequency"} -{"id": 19172, "token": "\u0120Dean", "merges": "\u0120De an", "count": 981, "type": "remove by frequency"} -{"id": 26192, "token": "\u0120Baron", "merges": "\u0120Bar on", "count": 982, "type": "remove by frequency"} -{"id": 36112, "token": "\u0120caud", "merges": "\u0120c aud", "count": 982, "type": "remove by frequency"} -{"id": 35569, "token": "\u0120wenn", "merges": "\u0120w enn", "count": 982, "type": "remove by frequency"} -{"id": 36954, "token": "\u0120leuc", "merges": "\u0120le uc", "count": 982, "type": "remove by frequency"} -{"id": 33691, "token": "\u0120elongation", "merges": "\u0120elong ation", "count": 983, "type": "remove by frequency"} -{"id": 18360, "token": "\u0120affidavit", "merges": "\u0120affidav it", "count": 983, "type": "remove by frequency"} -{"id": 48048, "token": "\u0120rapper", "merges": "\u0120ra pper", "count": 983, "type": "remove by frequency"} -{"id": 36876, "token": "\u0120eso", "merges": "\u0120es o", "count": 983, "type": "remove by frequency"} -{"id": 33201, "token": "\u0120metallicity", "merges": "\u0120metall icity", "count": 984, "type": "remove by frequency"} -{"id": 17974, "token": "\u0120felony", "merges": "\u0120fel ony", "count": 984, "type": "remove by frequency"} -{"id": 49876, "token": "\u0120roared", "merges": "\u0120ro ared", "count": 984, "type": "remove by frequency"} -{"id": 20577, "token": "\u0120Craig", "merges": "\u0120Cra ig", "count": 984, "type": "remove by frequency"} -{"id": 39124, "token": "\u0120prejudiced", "merges": "\u0120prejud iced", "count": 985, "type": "remove by frequency"} -{"id": 22497, "token": "\u0120Norman", "merges": "\u0120Norm an", "count": 985, "type": "remove by frequency"} -{"id": 26907, "token": "\u00c3\u00bablic", "merges": "\u00c3\u00ba blic", "count": 985, "type": "remove by frequency"} -{"id": 31138, "token": "\u0120Paulo", "merges": "\u0120Paul o", "count": 985, "type": "remove by frequency"} -{"id": 24919, "token": "\u0120Shah", "merges": "\u0120Sh ah", "count": 985, "type": "remove by frequency"} -{"id": 38078, "token": "\u0120Increasing", "merges": "\u0120Incre asing", "count": 986, "type": "remove by frequency"} -{"id": 25458, "token": "\u0120Wallace", "merges": "\u0120Wall ace", "count": 986, "type": "remove by frequency"} -{"id": 21946, "token": "\u0120CBS", "merges": "\u0120C BS", "count": 986, "type": "remove by frequency"} -{"id": 34847, "token": "gesterone", "merges": "gester one", "count": 987, "type": "remove by frequency"} -{"id": 47362, "token": "\u0120jerked", "merges": "\u0120jerk ed", "count": 987, "type": "remove by frequency"} -{"id": 37165, "token": "\u0120embol", "merges": "\u0120emb ol", "count": 987, "type": "remove by frequency"} -{"id": 22579, "token": "\u0120Kirk", "merges": "\u0120K irk", "count": 987, "type": "remove by frequency"} -{"id": 49216, "token": "\u0120\u00c3\u00b3", "merges": "\u0120\u00c3 \u00b3", "count": 987, "type": "remove by frequency"} -{"id": 50132, "token": "\u0120Sensitivity", "merges": "\u0120S ensitivity", "count": 988, "type": "remove by frequency"} -{"id": 39620, "token": "\u0120condensate", "merges": "\u0120condens ate", "count": 988, "type": "remove by frequency"} -{"id": 35135, "token": "\u0120placent", "merges": "\u0120pl acent", "count": 988, "type": "remove by frequency"} -{"id": 32016, "token": "\u0120Faculty", "merges": "\u0120Fac ulty", "count": 988, "type": "remove by frequency"} -{"id": 45715, "token": "ieurs", "merges": "ie urs", "count": 988, "type": "remove by frequency"} -{"id": 15652, "token": "displaystyle", "merges": "display style", "count": 989, "type": "remove by frequency"} -{"id": 24955, "token": "\u0120Someone", "merges": "\u0120Some one", "count": 989, "type": "remove by frequency"} -{"id": 17794, "token": "\u0120Argent", "merges": "\u0120Ar gent", "count": 989, "type": "remove by frequency"} -{"id": 35814, "token": "\u0120Tum", "merges": "\u0120T um", "count": 989, "type": "remove by frequency"} -{"id": 39967, "token": "\u0120Dart", "merges": "\u0120D art", "count": 990, "type": "remove by frequency"} -{"id": 47819, "token": "\u0120whispers", "merges": "\u0120whis pers", "count": 991, "type": "remove by frequency"} -{"id": 41854, "token": "\u0120forearm", "merges": "\u0120fore arm", "count": 991, "type": "remove by frequency"} -{"id": 27261, "token": "\u0120Julia", "merges": "\u0120Jul ia", "count": 991, "type": "remove by frequency"} -{"id": 43060, "token": "\u0120Wit", "merges": "\u0120W it", "count": 991, "type": "remove by frequency"} -{"id": 35226, "token": "\u0120Programme", "merges": "\u0120Program me", "count": 992, "type": "remove by frequency"} -{"id": 28595, "token": "\u0120toner", "merges": "\u0120ton er", "count": 992, "type": "remove by frequency"} -{"id": 34946, "token": "\u0120Kumar", "merges": "\u0120K umar", "count": 992, "type": "remove by frequency"} -{"id": 15946, "token": "\u0120Pitt", "merges": "\u0120P itt", "count": 992, "type": "remove by frequency"} -{"id": 27749, "token": "\u0120peu", "merges": "\u0120pe u", "count": 992, "type": "remove by frequency"} -{"id": 50150, "token": "\u0120ICO", "merges": "\u0120I CO", "count": 992, "type": "remove by frequency"} -{"id": 50288, "token": "\u00cb\u013b", "merges": "\u00cb \u013b", "count": 992, "type": "remove by frequency"} -{"id": 42016, "token": "amethasone", "merges": "ameth asone", "count": 994, "type": "remove by frequency"} -{"id": 33289, "token": "\u0120rearrang", "merges": "\u0120rearr ang", "count": 994, "type": "remove by frequency"} -{"id": 31088, "token": "\u0120suffices", "merges": "\u0120suff ices", "count": 994, "type": "remove by frequency"} -{"id": 24360, "token": "\u0120endomet", "merges": "\u0120end omet", "count": 994, "type": "remove by frequency"} -{"id": 28828, "token": "\u0120innings", "merges": "\u0120inn ings", "count": 994, "type": "remove by frequency"} -{"id": 33755, "token": "\u0120intercourse", "merges": "\u0120inter course", "count": 995, "type": "remove by frequency"} -{"id": 40388, "token": "\u0120Giant", "merges": "\u0120G iant", "count": 995, "type": "remove by frequency"} -{"id": 28887, "token": "\u0120Tat", "merges": "\u0120T at", "count": 995, "type": "remove by frequency"} -{"id": 25583, "token": "\\|\\", "merges": "\\ |\\", "count": 995, "type": "remove by frequency"} -{"id": 33695, "token": "\u00e0\u00b5\u012f", "merges": "\u00e0\u00b5 \u012f", "count": 995, "type": "remove by frequency"} -{"id": 42337, "token": "\u0120Wellington", "merges": "\u0120W ellington", "count": 996, "type": "remove by frequency"} -{"id": 10586, "token": "\u0120Southern", "merges": "\u0120S outhern", "count": 996, "type": "remove by frequency"} -{"id": 39293, "token": "\u0120Chal", "merges": "\u0120Ch al", "count": 996, "type": "remove by frequency"} -{"id": 28642, "token": "\u0120Ana", "merges": "\u0120An a", "count": 996, "type": "remove by frequency"} -{"id": 48909, "token": "\u0120Wn", "merges": "\u0120W n", "count": 996, "type": "remove by frequency"} -{"id": 31247, "token": "\u0120Carr", "merges": "\u0120Car r", "count": 997, "type": "remove by frequency"} -{"id": 27478, "token": "\u0120ESPN", "merges": "\u0120ESP N", "count": 997, "type": "remove by frequency"} -{"id": 50358, "token": "\u00e2\u0138\u0129", "merges": "\u00e2\u0138 \u0129", "count": 997, "type": "remove by frequency"} -{"id": 48147, "token": "\u0120glancing", "merges": "\u0120gl ancing", "count": 998, "type": "remove by frequency"} -{"id": 30595, "token": "\u0120elast", "merges": "\u0120el ast", "count": 998, "type": "remove by frequency"} -{"id": 29761, "token": "\u0120Miles", "merges": "\u0120M iles", "count": 998, "type": "remove by frequency"} -{"id": 20627, "token": "\u0120Nash", "merges": "\u0120N ash", "count": 998, "type": "remove by frequency"} -{"id": 27344, "token": "\u0120\u00e2\u0123", "merges": "\u0120\u00e2 \u0123", "count": 998, "type": "remove by frequency"} -{"id": 45210, "token": "\u0120borderline", "merges": "\u0120border line", "count": 999, "type": "remove by frequency"} -{"id": 26958, "token": "\u0120Alexand", "merges": "\u0120Alex and", "count": 1000, "type": "remove by frequency"} -{"id": 49468, "token": "\u0120Galois", "merges": "\u0120Gal ois", "count": 1000, "type": "remove by frequency"} -{"id": 21969, "token": "\u0120m\u00c3\u00aame", "merges": "\u0120m \u00c3\u00aame", "count": 1000, "type": "remove by frequency"} -{"id": 25943, "token": "\u0120supplementation", "merges": "\u0120supplement ation", "count": 1001, "type": "remove by frequency"} -{"id": 49421, "token": "\u0120oscillating", "merges": "\u0120oscill ating", "count": 1001, "type": "remove by frequency"} -{"id": 46342, "token": "\u0120grilled", "merges": "\u0120gr illed", "count": 1001, "type": "remove by frequency"} -{"id": 42544, "token": "\u0120uterus", "merges": "\u0120uter us", "count": 1001, "type": "remove by frequency"} -{"id": 14733, "token": "\u00cf\u0123\u00ce\u00bf", "merges": "\u00cf\u0123 \u00ce\u00bf", "count": 1001, "type": "remove by frequency"} -{"id": 41852, "token": "\u0120Wen", "merges": "\u0120W en", "count": 1001, "type": "remove by frequency"} -{"id": 22490, "token": "itoneal", "merges": "itone al", "count": 1002, "type": "remove by frequency"} -{"id": 34799, "token": "ococcal", "merges": "ococ cal", "count": 1002, "type": "remove by frequency"} -{"id": 28294, "token": "\u0120monol", "merges": "\u0120mon ol", "count": 1002, "type": "remove by frequency"} -{"id": 38221, "token": "\u00e0\u00a6\u00b0", "merges": "\u00e0\u00a6 \u00b0", "count": 1002, "type": "remove by frequency"} -{"id": 31047, "token": "\u00d5\u00a1", "merges": "\u00d5 \u00a1", "count": 1002, "type": "remove by frequency"} -{"id": 32015, "token": "\u0120Partnership", "merges": "\u0120Part nership", "count": 1003, "type": "remove by frequency"} -{"id": 14386, "token": "\u0120toxicity", "merges": "\u0120tox icity", "count": 1003, "type": "remove by frequency"} -{"id": 40487, "token": "\u0120Mell", "merges": "\u0120M ell", "count": 1003, "type": "remove by frequency"} -{"id": 49480, "token": "\u0120Cors", "merges": "\u0120C ors", "count": 1004, "type": "remove by frequency"} -{"id": 27461, "token": "\u0120Lis", "merges": "\u0120L is", "count": 1004, "type": "remove by frequency"} -{"id": 42845, "token": "\u0120jot", "merges": "\u0120j ot", "count": 1004, "type": "remove by frequency"} -{"id": 43161, "token": "\u0120Improvement", "merges": "\u0120Improve ment", "count": 1005, "type": "remove by frequency"} -{"id": 43585, "token": "\u0120dispersal", "merges": "\u0120dispers al", "count": 1005, "type": "remove by frequency"} -{"id": 49006, "token": "\u0120nodding", "merges": "\u0120nod ding", "count": 1005, "type": "remove by frequency"} -{"id": 26993, "token": "\u0120Female", "merges": "\u0120F emale", "count": 1005, "type": "remove by frequency"} -{"id": 44052, "token": "\u0120Yoga", "merges": "\u0120Y oga", "count": 1005, "type": "remove by frequency"} -{"id": 36646, "token": "\u0120\u00c3\u00a7a", "merges": "\u0120\u00c3\u00a7 a", "count": 1005, "type": "remove by frequency"} -{"id": 35912, "token": "\u0120pylori", "merges": "\u0120pyl ori", "count": 1006, "type": "remove by frequency"} -{"id": 48129, "token": "\u0120hydration", "merges": "\u0120hyd ration", "count": 1007, "type": "remove by frequency"} -{"id": 45829, "token": "\u0120scall", "merges": "\u0120sc all", "count": 1007, "type": "remove by frequency"} -{"id": 27443, "token": "\u0120Ivan", "merges": "\u0120I van", "count": 1007, "type": "remove by frequency"} -{"id": 46477, "token": "\u0120g\u00c3\u00a9n\u00c3\u00a9", "merges": "\u0120g \u00c3\u00a9n\u00c3\u00a9", "count": 1008, "type": "remove by frequency"} -{"id": 26496, "token": "\u0120Woods", "merges": "\u0120Wood s", "count": 1008, "type": "remove by frequency"} -{"id": 27735, "token": "\u0120muit", "merges": "\u0120m uit", "count": 1008, "type": "remove by frequency"} -{"id": 46487, "token": "\u0120Tin", "merges": "\u0120T in", "count": 1008, "type": "remove by frequency"} -{"id": 43826, "token": "\u0120Edu", "merges": "\u0120E du", "count": 1008, "type": "remove by frequency"} -{"id": 49825, "token": "\u0120lifeless", "merges": "\u0120lif eless", "count": 1009, "type": "remove by frequency"} -{"id": 31390, "token": "opoietic", "merges": "op oietic", "count": 1009, "type": "remove by frequency"} -{"id": 33042, "token": "\u0120Canal", "merges": "\u0120Can al", "count": 1009, "type": "remove by frequency"} -{"id": 40752, "token": "\u0120stratification", "merges": "\u0120strat ification", "count": 1010, "type": "remove by frequency"} -{"id": 35234, "token": "\u0120r\u00c3\u00a9g", "merges": "\u0120r\u00c3\u00a9 g", "count": 1010, "type": "remove by frequency"} -{"id": 31188, "token": "\u0120Census", "merges": "\u0120C ensus", "count": 1011, "type": "remove by frequency"} -{"id": 38002, "token": "\u0120Glor", "merges": "\u0120G lor", "count": 1011, "type": "remove by frequency"} -{"id": 39329, "token": "\u0120Hiro", "merges": "\u0120H iro", "count": 1011, "type": "remove by frequency"} -{"id": 17757, "token": "\u00ce\u00bc\u00ce\u00b1", "merges": "\u00ce\u00bc \u00ce\u00b1", "count": 1011, "type": "remove by frequency"} -{"id": 28567, "token": "\u0120implanted", "merges": "\u0120impl anted", "count": 1012, "type": "remove by frequency"} -{"id": 48805, "token": "\u0120Bulletin", "merges": "\u0120Bullet in", "count": 1012, "type": "remove by frequency"} -{"id": 41505, "token": "\u00d0\u00be\u00d0\u00b2\u00d0\u00be\u00d1\u0122", "merges": "\u00d0\u00be\u00d0\u00b2 \u00d0\u00be\u00d1\u0122", "count": 1012, "type": "remove by frequency"} -{"id": 48661, "token": "\u0120microl", "merges": "\u0120mic rol", "count": 1012, "type": "remove by frequency"} -{"id": 35887, "token": "\u0120Beast", "merges": "\u0120Be ast", "count": 1012, "type": "remove by frequency"} -{"id": 26211, "token": "\u0120Sullivan", "merges": "\u0120S ullivan", "count": 1013, "type": "remove by frequency"} -{"id": 40251, "token": "chrane", "merges": "chr ane", "count": 1014, "type": "remove by frequency"} -{"id": 24569, "token": "\u0120Wells", "merges": "\u0120Well s", "count": 1014, "type": "remove by frequency"} -{"id": 27773, "token": "\u0120Blake", "merges": "\u0120Bl ake", "count": 1014, "type": "remove by frequency"} -{"id": 37179, "token": "\u0120suis", "merges": "\u0120su is", "count": 1014, "type": "remove by frequency"} -{"id": 38140, "token": "\u0120FDR", "merges": "\u0120F DR", "count": 1015, "type": "remove by frequency"} -{"id": 43968, "token": "\u0120Cyp", "merges": "\u0120C yp", "count": 1015, "type": "remove by frequency"} -{"id": 41561, "token": "\u0120Bac", "merges": "\u0120B ac", "count": 1015, "type": "remove by frequency"} -{"id": 14157, "token": ">::", "merges": "> ::", "count": 1015, "type": "remove by frequency"} -{"id": 41886, "token": "\u0120convergent", "merges": "\u0120conver gent", "count": 1016, "type": "remove by frequency"} -{"id": 32550, "token": "\u0120Prophet", "merges": "\u0120Prop het", "count": 1016, "type": "remove by frequency"} -{"id": 34825, "token": "\u0120Zeit", "merges": "\u0120Ze it", "count": 1016, "type": "remove by frequency"} -{"id": 14411, "token": ")}(", "merges": ") }(", "count": 1016, "type": "remove by frequency"} -{"id": 36674, "token": "\u0120subpoena", "merges": "\u0120subpo ena", "count": 1017, "type": "remove by frequency"} -{"id": 35151, "token": "\u0120Terr", "merges": "\u0120T err", "count": 1017, "type": "remove by frequency"} -{"id": 32064, "token": "\u0120Kend", "merges": "\u0120K end", "count": 1017, "type": "remove by frequency"} -{"id": 43765, "token": "\u0120ORF", "merges": "\u0120OR F", "count": 1017, "type": "remove by frequency"} -{"id": 43758, "token": "\u0120continuance", "merges": "\u0120contin uance", "count": 1018, "type": "remove by frequency"} -{"id": 25010, "token": "\u0120Eventually", "merges": "\u0120Event ually", "count": 1019, "type": "remove by frequency"} -{"id": 36456, "token": "\u0120pitching", "merges": "\u0120pitch ing", "count": 1019, "type": "remove by frequency"} -{"id": 38075, "token": "\u0120FIFA", "merges": "\u0120FIF A", "count": 1019, "type": "remove by frequency"} -{"id": 42340, "token": "\u0120Wow", "merges": "\u0120W ow", "count": 1019, "type": "remove by frequency"} -{"id": 40024, "token": "r\u00c3\u00a1", "merges": "r \u00c3\u00a1", "count": 1019, "type": "remove by frequency"} -{"id": 47532, "token": "\u00c3\u00a2te", "merges": "\u00c3\u00a2 te", "count": 1020, "type": "remove by frequency"} -{"id": 50349, "token": "\u00e2\u0137\u0143", "merges": "\u00e2\u0137 \u0143", "count": 1020, "type": "remove by frequency"} -{"id": 47224, "token": "\u0120umbilical", "merges": "\u0120umbil ical", "count": 1021, "type": "remove by frequency"} -{"id": 34211, "token": "\u0120Million", "merges": "\u0120M illion", "count": 1021, "type": "remove by frequency"} -{"id": 25882, "token": "\u0120stent", "merges": "\u0120st ent", "count": 1021, "type": "remove by frequency"} -{"id": 43125, "token": "\u0120Dipl", "merges": "\u0120Di pl", "count": 1021, "type": "remove by frequency"} -{"id": 43752, "token": "\u0120CNT", "merges": "\u0120C NT", "count": 1021, "type": "remove by frequency"} -{"id": 33345, "token": "\u0120importante", "merges": "\u0120important e", "count": 1022, "type": "remove by frequency"} -{"id": 43417, "token": "\u0120Telescope", "merges": "\u0120Telesc ope", "count": 1022, "type": "remove by frequency"} -{"id": 43633, "token": "\u0120Myth", "merges": "\u0120My th", "count": 1022, "type": "remove by frequency"} -{"id": 45473, "token": "teral", "merges": "ter al", "count": 1022, "type": "remove by frequency"} -{"id": 45811, "token": "\u0120narrator", "merges": "\u0120narr ator", "count": 1023, "type": "remove by frequency"} -{"id": 30789, "token": "\u0120Kap", "merges": "\u0120K ap", "count": 1023, "type": "remove by frequency"} -{"id": 38566, "token": "\u0120Electro", "merges": "\u0120Elect ro", "count": 1024, "type": "remove by frequency"} -{"id": 26737, "token": "\u0120Johann", "merges": "\u0120Joh ann", "count": 1024, "type": "remove by frequency"} -{"id": 37334, "token": "\u0120septic", "merges": "\u0120se ptic", "count": 1024, "type": "remove by frequency"} -{"id": 36210, "token": "\u0120Venus", "merges": "\u0120Ven us", "count": 1024, "type": "remove by frequency"} -{"id": 34604, "token": "\u0120potency", "merges": "\u0120pot ency", "count": 1025, "type": "remove by frequency"} -{"id": 23253, "token": "\u0120Holland", "merges": "\u0120Hol land", "count": 1025, "type": "remove by frequency"} -{"id": 23035, "token": "\u0120accret", "merges": "\u0120acc ret", "count": 1025, "type": "remove by frequency"} -{"id": 48020, "token": "\u0120Abuse", "merges": "\u0120Ab use", "count": 1025, "type": "remove by frequency"} -{"id": 42971, "token": "\u0120TODAY", "merges": "\u0120TOD AY", "count": 1025, "type": "remove by frequency"} -{"id": 20795, "token": "\u0120$,", "merges": "\u0120$ ,", "count": 1025, "type": "remove by frequency"} -{"id": 28848, "token": "\u0120Galactic", "merges": "\u0120Gal actic", "count": 1026, "type": "remove by frequency"} -{"id": 17330, "token": "\u0120Harvard", "merges": "\u0120Har vard", "count": 1026, "type": "remove by frequency"} -{"id": 24910, "token": "\u0120einer", "merges": "\u0120e iner", "count": 1026, "type": "remove by frequency"} -{"id": 47421, "token": "\u0120admittedly", "merges": "\u0120admitted ly", "count": 1027, "type": "remove by frequency"} -{"id": 30393, "token": "\u0120anisotropy", "merges": "\u0120anisot ropy", "count": 1027, "type": "remove by frequency"} -{"id": 17179, "token": "\u0120Franklin", "merges": "\u0120Frank lin", "count": 1027, "type": "remove by frequency"} -{"id": 48707, "token": "regulate", "merges": "reg ulate", "count": 1027, "type": "remove by frequency"} -{"id": 27432, "token": "\u0120einen", "merges": "\u0120ein en", "count": 1027, "type": "remove by frequency"} -{"id": 7718, "token": "cdots", "merges": "cd ots", "count": 1027, "type": "remove by frequency"} -{"id": 43844, "token": "\u0120Zend", "merges": "\u0120Z end", "count": 1027, "type": "remove by frequency"} -{"id": 45453, "token": "\u0120retrograde", "merges": "\u0120retro grade", "count": 1028, "type": "remove by frequency"} -{"id": 37849, "token": "\u0120\u00ce\u00a3", "merges": "\u0120\u00ce \u00a3", "count": 1028, "type": "remove by frequency"} -{"id": 20884, "token": "\u0120Military", "merges": "\u0120Mil itary", "count": 1029, "type": "remove by frequency"} -{"id": 37711, "token": "\u0120aliqu", "merges": "\u0120al iqu", "count": 1029, "type": "remove by frequency"} -{"id": 41481, "token": "\u0120Arms", "merges": "\u0120Ar ms", "count": 1029, "type": "remove by frequency"} -{"id": 29132, "token": "\u0120Rus", "merges": "\u0120R us", "count": 1029, "type": "remove by frequency"} -{"id": 25998, "token": "\u0120Apparently", "merges": "\u0120App arently", "count": 1030, "type": "remove by frequency"} -{"id": 46983, "token": "\u0120fantasies", "merges": "\u0120fantas ies", "count": 1030, "type": "remove by frequency"} -{"id": 32090, "token": ".\u00e2\u0122\u013b\u00e2\u0122\u013f", "merges": ".\u00e2\u0122\u013b \u00e2\u0122\u013f", "count": 1030, "type": "remove by frequency"} -{"id": 28810, "token": "\u0120Born", "merges": "\u0120B orn", "count": 1030, "type": "remove by frequency"} -{"id": 47054, "token": "\u00e3\u0125\u0141", "merges": "\u00e3\u0125 \u0141", "count": 1031, "type": "remove by frequency"} -{"id": 26327, "token": "\u0120Athlet", "merges": "\u0120Ath let", "count": 1032, "type": "remove by frequency"} -{"id": 45406, "token": "\u0120semif", "merges": "\u0120sem if", "count": 1032, "type": "remove by frequency"} -{"id": 41100, "token": "\u0120Chef", "merges": "\u0120Che f", "count": 1032, "type": "remove by frequency"} -{"id": 17583, "token": "\u00c2\u00b9", "merges": "\u00c2 \u00b9", "count": 1032, "type": "remove by frequency"} -{"id": 21564, "token": "\u0120eryth", "merges": "\u0120ery th", "count": 1033, "type": "remove by frequency"} -{"id": 31747, "token": "\u0120rheumat", "merges": "\u0120rhe umat", "count": 1034, "type": "remove by frequency"} -{"id": 42166, "token": "\u0120Singer", "merges": "\u0120S inger", "count": 1034, "type": "remove by frequency"} -{"id": 26503, "token": "\u0120Arabic", "merges": "\u0120Arab ic", "count": 1034, "type": "remove by frequency"} -{"id": 42107, "token": "\u0120Caf", "merges": "\u0120C af", "count": 1034, "type": "remove by frequency"} -{"id": 44649, "token": "\u00e3\u0125\u0135", "merges": "\u00e3\u0125 \u0135", "count": 1034, "type": "remove by frequency"} -{"id": 24661, "token": ")}_", "merges": ") }_", "count": 1034, "type": "remove by frequency"} -{"id": 29256, "token": "\u0120Og", "merges": "\u0120O g", "count": 1034, "type": "remove by frequency"} -{"id": 21910, "token": "\u0120Generally", "merges": "\u0120Gener ally", "count": 1035, "type": "remove by frequency"} -{"id": 42742, "token": "\u0120adorable", "merges": "\u0120ad orable", "count": 1035, "type": "remove by frequency"} -{"id": 34879, "token": "\u0120cerebro", "merges": "\u0120cere bro", "count": 1035, "type": "remove by frequency"} -{"id": 22992, "token": "\u0120Earl", "merges": "\u0120E arl", "count": 1035, "type": "remove by frequency"} -{"id": 16908, "token": "\u0120Dave", "merges": "\u0120D ave", "count": 1035, "type": "remove by frequency"} -{"id": 23184, "token": "\u0120Das", "merges": "\u0120D as", "count": 1037, "type": "remove by frequency"} -{"id": 35747, "token": "\u0120Bast", "merges": "\u0120B ast", "count": 1038, "type": "remove by frequency"} -{"id": 42724, "token": "\u0120Voy", "merges": "\u0120V oy", "count": 1038, "type": "remove by frequency"} -{"id": 48846, "token": "\u0120blockers", "merges": "\u0120block ers", "count": 1039, "type": "remove by frequency"} -{"id": 40308, "token": "\u0120indie", "merges": "\u0120ind ie", "count": 1039, "type": "remove by frequency"} -{"id": 42957, "token": "galact", "merges": "gal act", "count": 1039, "type": "remove by frequency"} -{"id": 14912, "token": "\u0120Diego", "merges": "\u0120Die go", "count": 1039, "type": "remove by frequency"} -{"id": 36769, "token": "astom", "merges": "ast om", "count": 1039, "type": "remove by frequency"} -{"id": 34098, "token": "\u0120Mathematical", "merges": "\u0120Mathemat ical", "count": 1040, "type": "remove by frequency"} -{"id": 45769, "token": "\u0120semen", "merges": "\u0120sem en", "count": 1040, "type": "remove by frequency"} -{"id": 5655, "token": "bolds", "merges": "b olds", "count": 1040, "type": "remove by frequency"} -{"id": 45397, "token": "\u0120Ivy", "merges": "\u0120I vy", "count": 1040, "type": "remove by frequency"} -{"id": 29009, "token": "\u0120circuitry", "merges": "\u0120circuit ry", "count": 1041, "type": "remove by frequency"} -{"id": 20903, "token": "\u0120sperm", "merges": "\u0120sper m", "count": 1041, "type": "remove by frequency"} -{"id": 43637, "token": "\u0120chuckled", "merges": "\u0120chuck led", "count": 1042, "type": "remove by frequency"} -{"id": 15360, "token": "\u0120Portug", "merges": "\u0120Port ug", "count": 1042, "type": "remove by frequency"} -{"id": 26979, "token": "\u0120pr\u00c3\u00a9s", "merges": "\u0120pr \u00c3\u00a9s", "count": 1042, "type": "remove by frequency"} -{"id": 34445, "token": "\u0120Bil", "merges": "\u0120B il", "count": 1042, "type": "remove by frequency"} -{"id": 39233, "token": "\u0120Era", "merges": "\u0120E ra", "count": 1042, "type": "remove by frequency"} -{"id": 21670, "token": "\u0120$_", "merges": "\u0120$ _", "count": 1042, "type": "remove by frequency"} -{"id": 42019, "token": "\u0120deleterious", "merges": "\u0120deleter ious", "count": 1043, "type": "remove by frequency"} -{"id": 23551, "token": "\u0120pathogenic", "merges": "\u0120path ogenic", "count": 1043, "type": "remove by frequency"} -{"id": 29915, "token": "\u0120Armstrong", "merges": "\u0120Arm strong", "count": 1043, "type": "remove by frequency"} -{"id": 32808, "token": "\u0120nitric", "merges": "\u0120nit ric", "count": 1043, "type": "remove by frequency"} -{"id": 27783, "token": "\u0120Barr", "merges": "\u0120B arr", "count": 1043, "type": "remove by frequency"} -{"id": 47342, "token": "\u0120eyebrow", "merges": "\u0120eyeb row", "count": 1044, "type": "remove by frequency"} -{"id": 7948, "token": "\u0120Tuesday", "merges": "\u0120T uesday", "count": 1044, "type": "remove by frequency"} -{"id": 30867, "token": "\u0120Fri", "merges": "\u0120F ri", "count": 1044, "type": "remove by frequency"} -{"id": 13069, "token": "\u00c5\u00a3", "merges": "\u00c5 \u00a3", "count": 1044, "type": "remove by frequency"} -{"id": 23032, "token": "\u0120Imperial", "merges": "\u0120Im perial", "count": 1045, "type": "remove by frequency"} -{"id": 42193, "token": "\u0120Fiction", "merges": "\u0120F iction", "count": 1045, "type": "remove by frequency"} -{"id": 46018, "token": "\u0120Commod", "merges": "\u0120Com mod", "count": 1045, "type": "remove by frequency"} -{"id": 32974, "token": "\u0120Devil", "merges": "\u0120Dev il", "count": 1045, "type": "remove by frequency"} -{"id": 36490, "token": "\u0120familial", "merges": "\u0120famil ial", "count": 1046, "type": "remove by frequency"} -{"id": 39783, "token": "\u0120screams", "merges": "\u0120sc reams", "count": 1046, "type": "remove by frequency"} -{"id": 30521, "token": "\u0120Mend", "merges": "\u0120M end", "count": 1046, "type": "remove by frequency"} -{"id": 38608, "token": "\u0120Diss", "merges": "\u0120D iss", "count": 1046, "type": "remove by frequency"} -{"id": 43158, "token": "\u0120Crew", "merges": "\u0120C rew", "count": 1046, "type": "remove by frequency"} -{"id": 7780, "token": "}}{\\", "merges": "}} {\\", "count": 1046, "type": "remove by frequency"} -{"id": 47809, "token": "=================================", "merges": "================================ =", "count": 1047, "type": "remove by frequency"} -{"id": 45264, "token": "\u0120arousal", "merges": "\u0120ar ousal", "count": 1047, "type": "remove by frequency"} -{"id": 44306, "token": "))/((", "merges": "))/ ((", "count": 1047, "type": "remove by frequency"} -{"id": 42216, "token": "\u0120Gob", "merges": "\u0120G ob", "count": 1047, "type": "remove by frequency"} -{"id": 36389, "token": "\u0120IJ", "merges": "\u0120I J", "count": 1047, "type": "remove by frequency"} -{"id": 41176, "token": "\u0120rhythms", "merges": "\u0120rhyth ms", "count": 1048, "type": "remove by frequency"} -{"id": 44576, "token": "\u0120\u00d0\u00b3\u00d0\u00be\u00d0\u00b4", "merges": "\u0120\u00d0\u00b3 \u00d0\u00be\u00d0\u00b4", "count": 1048, "type": "remove by frequency"} -{"id": 17384, "token": "ohist", "merges": "oh ist", "count": 1049, "type": "remove by frequency"} -{"id": 29024, "token": "\u0120Trek", "merges": "\u0120Tre k", "count": 1049, "type": "remove by frequency"} -{"id": 20058, "token": "\u0120Together", "merges": "\u0120T ogether", "count": 1050, "type": "remove by frequency"} -{"id": 50015, "token": "\u0120deceptive", "merges": "\u0120de ceptive", "count": 1051, "type": "remove by frequency"} -{"id": 26738, "token": "\u0120knockout", "merges": "\u0120knock out", "count": 1051, "type": "remove by frequency"} -{"id": 49515, "token": "\u0120tangled", "merges": "\u0120tang led", "count": 1051, "type": "remove by frequency"} -{"id": 33555, "token": "\u0120pretreatment", "merges": "\u0120pret reatment", "count": 1052, "type": "remove by frequency"} -{"id": 49843, "token": "\u0120holographic", "merges": "\u0120holog raphic", "count": 1052, "type": "remove by frequency"} -{"id": 33821, "token": "\u0120Lauren", "merges": "\u0120Lau ren", "count": 1052, "type": "remove by frequency"} -{"id": 12066, "token": "\u0120Viet", "merges": "\u0120V iet", "count": 1052, "type": "remove by frequency"} -{"id": 46317, "token": "\u0120Alto", "merges": "\u0120Al to", "count": 1052, "type": "remove by frequency"} -{"id": 37379, "token": "\u0120contre", "merges": "\u0120cont re", "count": 1053, "type": "remove by frequency"} -{"id": 18765, "token": "Though", "merges": "Th ough", "count": 1053, "type": "remove by frequency"} -{"id": 19962, "token": "\u0120Swiss", "merges": "\u0120Sw iss", "count": 1053, "type": "remove by frequency"} -{"id": 21442, "token": "polymers", "merges": "pol ymers", "count": 1054, "type": "remove by frequency"} -{"id": 17648, "token": "\u00cf\u0126\u00ce\u00b7", "merges": "\u00cf\u0126 \u00ce\u00b7", "count": 1054, "type": "remove by frequency"} -{"id": 43991, "token": "datepicker", "merges": "date picker", "count": 1055, "type": "remove by frequency"} -{"id": 15420, "token": "\u0120Wales", "merges": "\u0120W ales", "count": 1055, "type": "remove by frequency"} -{"id": 39778, "token": "\u0120conjugation", "merges": "\u0120conjug ation", "count": 1056, "type": "remove by frequency"} -{"id": 31354, "token": "ningen", "merges": "ning en", "count": 1056, "type": "remove by frequency"} -{"id": 48023, "token": "\u0120\"$(", "merges": "\u0120\" $(", "count": 1056, "type": "remove by frequency"} -{"id": 42065, "token": "\u0120Electrical", "merges": "\u0120Elect rical", "count": 1057, "type": "remove by frequency"} -{"id": 36448, "token": "\u0120causation", "merges": "\u0120caus ation", "count": 1057, "type": "remove by frequency"} -{"id": 26420, "token": "\u0120solub", "merges": "\u0120sol ub", "count": 1057, "type": "remove by frequency"} -{"id": 14901, "token": "\u0120Turk", "merges": "\u0120Tur k", "count": 1057, "type": "remove by frequency"} -{"id": 21664, "token": "\u0120Maur", "merges": "\u0120M aur", "count": 1057, "type": "remove by frequency"} -{"id": 41884, "token": "\u0120Wii", "merges": "\u0120W ii", "count": 1057, "type": "remove by frequency"} -{"id": 50173, "token": "ocardial", "merges": "ocard ial", "count": 1058, "type": "remove by frequency"} -{"id": 30330, "token": "\u0120Blues", "merges": "\u0120Bl ues", "count": 1058, "type": "remove by frequency"} -{"id": 24263, "token": "\u0120Penn", "merges": "\u0120P enn", "count": 1058, "type": "remove by frequency"} -{"id": 41540, "token": "\u0120Mang", "merges": "\u0120M ang", "count": 1058, "type": "remove by frequency"} -{"id": 47964, "token": "\u0120extremities", "merges": "\u0120extrem ities", "count": 1059, "type": "remove by frequency"} -{"id": 47233, "token": "\u0120conical", "merges": "\u0120con ical", "count": 1059, "type": "remove by frequency"} -{"id": 40254, "token": "\u0120Corporate", "merges": "\u0120Corpor ate", "count": 1060, "type": "remove by frequency"} -{"id": 49544, "token": "\u0120Feng", "merges": "\u0120F eng", "count": 1060, "type": "remove by frequency"} -{"id": 17317, "token": "\u0120\u00c3\u012b", "merges": "\u0120\u00c3 \u012b", "count": 1060, "type": "remove by frequency"} -{"id": 16254, "token": "\u0120placebo", "merges": "\u0120place bo", "count": 1061, "type": "remove by frequency"} -{"id": 27874, "token": "\u0120Remark", "merges": "\u0120Rem ark", "count": 1061, "type": "remove by frequency"} -{"id": 44485, "token": "\u0120Rent", "merges": "\u0120R ent", "count": 1061, "type": "remove by frequency"} -{"id": 38632, "token": "ubicin", "merges": "ub icin", "count": 1062, "type": "remove by frequency"} -{"id": 41033, "token": "\u0120suppresses", "merges": "\u0120suppress es", "count": 1063, "type": "remove by frequency"} -{"id": 16650, "token": "\u0120Fourth", "merges": "\u0120Four th", "count": 1063, "type": "remove by frequency"} -{"id": 50051, "token": "\u0120Yuan", "merges": "\u0120Y uan", "count": 1063, "type": "remove by frequency"} -{"id": 35127, "token": "\u00c3\u00b6m", "merges": "\u00c3\u00b6 m", "count": 1063, "type": "remove by frequency"} -{"id": 40889, "token": "\u0120asshole", "merges": "\u0120ass hole", "count": 1065, "type": "remove by frequency"} -{"id": 10044, "token": "\u0120\u00c3\u00ae", "merges": "\u0120\u00c3 \u00ae", "count": 1065, "type": "remove by frequency"} -{"id": 46520, "token": "\u0120\u00c5\u013d", "merges": "\u0120\u00c5 \u013d", "count": 1065, "type": "remove by frequency"} -{"id": 535, "token": "\u010a\u010a", "merges": "\u010a \u010a", "count": 1065, "type": "remove by frequency"} -{"id": 43173, "token": "\u0120calves", "merges": "\u0120cal ves", "count": 1066, "type": "remove by frequency"} -{"id": 24942, "token": "\u0120Foster", "merges": "\u0120F oster", "count": 1066, "type": "remove by frequency"} -{"id": 46121, "token": "GRect", "merges": "GR ect", "count": 1066, "type": "remove by frequency"} -{"id": 22787, "token": "\u0120Carm", "merges": "\u0120C arm", "count": 1066, "type": "remove by frequency"} -{"id": 6827, "token": "\u0120Mrs", "merges": "\u0120Mr s", "count": 1066, "type": "remove by frequency"} -{"id": 22024, "token": "\u0120Shakespeare", "merges": "\u0120Sh akespeare", "count": 1067, "type": "remove by frequency"} -{"id": 36046, "token": "\u0120disordered", "merges": "\u0120dis ordered", "count": 1067, "type": "remove by frequency"} -{"id": 40706, "token": "\u0120Telesc", "merges": "\u0120T elesc", "count": 1067, "type": "remove by frequency"} -{"id": 32612, "token": "c\u00c3\u00a9", "merges": "c \u00c3\u00a9", "count": 1067, "type": "remove by frequency"} -{"id": 30297, "token": "\u0120edema", "merges": "\u0120ed ema", "count": 1068, "type": "remove by frequency"} -{"id": 32203, "token": "\u0120Lor", "merges": "\u0120L or", "count": 1068, "type": "remove by frequency"} -{"id": 44730, "token": "\u0120skepticism", "merges": "\u0120skeptic ism", "count": 1069, "type": "remove by frequency"} -{"id": 46444, "token": "\u0120Ranger", "merges": "\u0120R anger", "count": 1069, "type": "remove by frequency"} -{"id": 28589, "token": "\u0120Lynch", "merges": "\u0120Lyn ch", "count": 1069, "type": "remove by frequency"} -{"id": 28547, "token": "\u0120noch", "merges": "\u0120no ch", "count": 1069, "type": "remove by frequency"} -{"id": 48271, "token": "\u0120Dip", "merges": "\u0120D ip", "count": 1069, "type": "remove by frequency"} -{"id": 12464, "token": "\u0120Lett", "merges": "\u0120Let t", "count": 1070, "type": "remove by frequency"} -{"id": 27470, "token": "\u0120FCC", "merges": "\u0120F CC", "count": 1070, "type": "remove by frequency"} -{"id": 27456, "token": "\u0120Nuclear", "merges": "\u0120N uclear", "count": 1071, "type": "remove by frequency"} -{"id": 41362, "token": "\u0120debuted", "merges": "\u0120debut ed", "count": 1071, "type": "remove by frequency"} -{"id": 49145, "token": "\u0120coy", "merges": "\u0120co y", "count": 1071, "type": "remove by frequency"} -{"id": 45486, "token": "chrotron", "merges": "chro tron", "count": 1072, "type": "remove by frequency"} -{"id": 47409, "token": "\u0120CAB", "merges": "\u0120C AB", "count": 1072, "type": "remove by frequency"} -{"id": 30850, "token": "oietic", "merges": "oi etic", "count": 1073, "type": "remove by frequency"} -{"id": 24395, "token": "\u0120Lloyd", "merges": "\u0120Ll oyd", "count": 1073, "type": "remove by frequency"} -{"id": 27917, "token": "\u0120Chase", "merges": "\u0120Ch ase", "count": 1073, "type": "remove by frequency"} -{"id": 21710, "token": "\u0120Clay", "merges": "\u0120Cl ay", "count": 1073, "type": "remove by frequency"} -{"id": 39308, "token": "\u0120Crisis", "merges": "\u0120Cris is", "count": 1074, "type": "remove by frequency"} -{"id": 14439, "token": "\u00cf\u0126\u00ce\u00b1", "merges": "\u00cf\u0126 \u00ce\u00b1", "count": 1074, "type": "remove by frequency"} -{"id": 49788, "token": "\u00d9\u012a\u00d8\u00af", "merges": "\u00d9\u012a \u00d8\u00af", "count": 1074, "type": "remove by frequency"} -{"id": 49373, "token": "\u0120manipulations", "merges": "\u0120manip ulations", "count": 1075, "type": "remove by frequency"} -{"id": 47292, "token": "Alright", "merges": "Al right", "count": 1075, "type": "remove by frequency"} -{"id": 38797, "token": "\u0120Planck", "merges": "\u0120Plan ck", "count": 1075, "type": "remove by frequency"} -{"id": 27286, "token": "\u0120Powell", "merges": "\u0120Pow ell", "count": 1075, "type": "remove by frequency"} -{"id": 38047, "token": "\u0120postnatal", "merges": "\u0120post natal", "count": 1076, "type": "remove by frequency"} -{"id": 45960, "token": "\u0120hardcore", "merges": "\u0120hard core", "count": 1076, "type": "remove by frequency"} -{"id": 8118, "token": "\u0120African", "merges": "\u0120Afric an", "count": 1076, "type": "remove by frequency"} -{"id": 41476, "token": "ovial", "merges": "ov ial", "count": 1076, "type": "remove by frequency"} -{"id": 7628, "token": "}=\\", "merges": "} =\\", "count": 1077, "type": "remove by frequency"} -{"id": 52364, "token": "\u00e6\u00a8\u0133", "merges": "\u00e6\u00a8 \u0133", "count": 1077, "type": "remove by frequency"} -{"id": 13355, "token": "\u0120$.", "merges": "\u0120$ .", "count": 1077, "type": "remove by frequency"} -{"id": 27811, "token": "\u0120Commercial", "merges": "\u0120Com mercial", "count": 1078, "type": "remove by frequency"} -{"id": 23325, "token": "\u0120Crime", "merges": "\u0120Cr ime", "count": 1078, "type": "remove by frequency"} -{"id": 46399, "token": "\u0120sectional", "merges": "\u0120section al", "count": 1079, "type": "remove by frequency"} -{"id": 32286, "token": "\u0120Theater", "merges": "\u0120The ater", "count": 1079, "type": "remove by frequency"} -{"id": 30066, "token": "\u0120SARS", "merges": "\u0120S ARS", "count": 1079, "type": "remove by frequency"} -{"id": 41659, "token": "\u0120Ply", "merges": "\u0120P ly", "count": 1079, "type": "remove by frequency"} -{"id": 35281, "token": "\u0120bullshit", "merges": "\u0120bull shit", "count": 1080, "type": "remove by frequency"} -{"id": 39398, "token": "\u0120Belt", "merges": "\u0120B elt", "count": 1080, "type": "remove by frequency"} -{"id": 44341, "token": "\u0120MDR", "merges": "\u0120M DR", "count": 1080, "type": "remove by frequency"} -{"id": 47268, "token": "\u0120chicks", "merges": "\u0120ch icks", "count": 1081, "type": "remove by frequency"} -{"id": 48428, "token": "\u0120trat", "merges": "\u0120tr at", "count": 1081, "type": "remove by frequency"} -{"id": 34518, "token": "\u0120FBS", "merges": "\u0120F BS", "count": 1081, "type": "remove by frequency"} -{"id": 36716, "token": "\u0120cirrhosis", "merges": "\u0120cirrh osis", "count": 1082, "type": "remove by frequency"} -{"id": 31172, "token": "\u0120elongated", "merges": "\u0120elong ated", "count": 1082, "type": "remove by frequency"} -{"id": 37464, "token": "\u0120morphine", "merges": "\u0120morph ine", "count": 1083, "type": "remove by frequency"} -{"id": 31541, "token": "\u0120Chicken", "merges": "\u0120Ch icken", "count": 1083, "type": "remove by frequency"} -{"id": 48884, "token": "\u0120Cruise", "merges": "\u0120Cru ise", "count": 1083, "type": "remove by frequency"} -{"id": 33932, "token": "\u0120Rust", "merges": "\u0120R ust", "count": 1083, "type": "remove by frequency"} -{"id": 48877, "token": "\u0120Lal", "merges": "\u0120L al", "count": 1083, "type": "remove by frequency"} -{"id": 4820, "token": "operatorname", "merges": "operator name", "count": 1084, "type": "remove by frequency"} -{"id": 41111, "token": "\u0120frivolous", "merges": "\u0120f rivolous", "count": 1084, "type": "remove by frequency"} -{"id": 34835, "token": "\u0120lineages", "merges": "\u0120line ages", "count": 1084, "type": "remove by frequency"} -{"id": 27320, "token": "\u0120nucleic", "merges": "\u0120nucle ic", "count": 1084, "type": "remove by frequency"} -{"id": 39131, "token": "\u0120Gott", "merges": "\u0120G ott", "count": 1084, "type": "remove by frequency"} -{"id": 45724, "token": "\u0120Pole", "merges": "\u0120P ole", "count": 1084, "type": "remove by frequency"} -{"id": 16874, "token": "\u0120Cruz", "merges": "\u0120Cru z", "count": 1084, "type": "remove by frequency"} -{"id": 34120, "token": "\u0120Ein", "merges": "\u0120E in", "count": 1084, "type": "remove by frequency"} -{"id": 25769, "token": "\u0120\u00c2\u00bf", "merges": "\u0120\u00c2 \u00bf", "count": 1084, "type": "remove by frequency"} -{"id": 40860, "token": "\u0120midfield", "merges": "\u0120mid field", "count": 1085, "type": "remove by frequency"} -{"id": 25968, "token": "\u0120Butler", "merges": "\u0120But ler", "count": 1085, "type": "remove by frequency"} -{"id": 43472, "token": "Prefab", "merges": "Pref ab", "count": 1085, "type": "remove by frequency"} -{"id": 24103, "token": "\u0120Fitz", "merges": "\u0120F itz", "count": 1085, "type": "remove by frequency"} -{"id": 24986, "token": "\u0120Kos", "merges": "\u0120K os", "count": 1085, "type": "remove by frequency"} -{"id": 37864, "token": "\u0120ligament", "merges": "\u0120lig ament", "count": 1086, "type": "remove by frequency"} -{"id": 23423, "token": "\u0120follic", "merges": "\u0120fol lic", "count": 1086, "type": "remove by frequency"} -{"id": 50019, "token": "iella", "merges": "i ella", "count": 1086, "type": "remove by frequency"} -{"id": 45967, "token": "\u0120hemp", "merges": "\u0120he mp", "count": 1086, "type": "remove by frequency"} -{"id": 37901, "token": "\u0120Nem", "merges": "\u0120N em", "count": 1086, "type": "remove by frequency"} -{"id": 49058, "token": "\u0120annoyance", "merges": "\u0120annoy ance", "count": 1087, "type": "remove by frequency"} -{"id": 21836, "token": "\u0120Whatever", "merges": "\u0120Wh atever", "count": 1087, "type": "remove by frequency"} -{"id": 42109, "token": "\u0120fists", "merges": "\u0120f ists", "count": 1087, "type": "remove by frequency"} -{"id": 13668, "token": "$^{-", "merges": "$ ^{-", "count": 1087, "type": "remove by frequency"} -{"id": 42509, "token": "\u0120shootings", "merges": "\u0120shoot ings", "count": 1088, "type": "remove by frequency"} -{"id": 19621, "token": "\u0120\u00d8\u00a7\u00d9\u0126\u00d8", "merges": "\u0120\u00d8\u00a7\u00d9\u0126 \u00d8", "count": 1088, "type": "remove by frequency"} -{"id": 8842, "token": "\u0120Cath", "merges": "\u0120C ath", "count": 1088, "type": "remove by frequency"} -{"id": 32780, "token": "\u0120PEG", "merges": "\u0120PE G", "count": 1088, "type": "remove by frequency"} -{"id": 27221, "token": "\u0120decays", "merges": "\u0120dec ays", "count": 1089, "type": "remove by frequency"} -{"id": 23900, "token": "\u0120Kre", "merges": "\u0120K re", "count": 1089, "type": "remove by frequency"} -{"id": 17186, "token": "\u00cf\u0125\u00ce\u00b7", "merges": "\u00cf\u0125 \u00ce\u00b7", "count": 1090, "type": "remove by frequency"} -{"id": 26117, "token": "\u0120Municip", "merges": "\u0120M unicip", "count": 1091, "type": "remove by frequency"} -{"id": 19046, "token": "\u0120Benjamin", "merges": "\u0120Ben jamin", "count": 1092, "type": "remove by frequency"} -{"id": 35003, "token": "\u0120peeled", "merges": "\u0120pe eled", "count": 1092, "type": "remove by frequency"} -{"id": 34130, "token": "\u0120ABOUT", "merges": "\u0120AB OUT", "count": 1092, "type": "remove by frequency"} -{"id": 37838, "token": "\u0120Frost", "merges": "\u0120F rost", "count": 1092, "type": "remove by frequency"} -{"id": 18117, "token": "\u0120dass", "merges": "\u0120d ass", "count": 1092, "type": "remove by frequency"} -{"id": 32580, "token": "\u0120Ges", "merges": "\u0120G es", "count": 1092, "type": "remove by frequency"} -{"id": 34341, "token": "\u0120\u00d8\u00b3", "merges": "\u0120\u00d8 \u00b3", "count": 1092, "type": "remove by frequency"} -{"id": 34663, "token": "\u0120chemically", "merges": "\u0120chem ically", "count": 1093, "type": "remove by frequency"} -{"id": 44317, "token": "\u0120dehydr", "merges": "\u0120de hydr", "count": 1093, "type": "remove by frequency"} -{"id": 16404, "token": "\u0120Brief", "merges": "\u0120B rief", "count": 1093, "type": "remove by frequency"} -{"id": 39984, "token": "\u00e3\u0123\u013f\u00e3\u0124\u012e", "merges": "\u00e3\u0123\u013f \u00e3\u0124\u012e", "count": 1093, "type": "remove by frequency"} -{"id": 41966, "token": "\u0120Refs", "merges": "\u0120Ref s", "count": 1093, "type": "remove by frequency"} -{"id": 43512, "token": "\u0120NOTICE", "merges": "\u0120NOT ICE", "count": 1094, "type": "remove by frequency"} -{"id": 39508, "token": "\u0120undergoes", "merges": "\u0120undergo es", "count": 1095, "type": "remove by frequency"} -{"id": 31821, "token": "erably", "merges": "er ably", "count": 1095, "type": "remove by frequency"} -{"id": 39692, "token": "\u00e3\u0124\u012b\u00e3\u0124\u012e", "merges": "\u00e3\u0124\u012b \u00e3\u0124\u012e", "count": 1095, "type": "remove by frequency"} -{"id": 35187, "token": "\u0120scint", "merges": "\u0120sc int", "count": 1095, "type": "remove by frequency"} -{"id": 27972, "token": "\u0120correlates", "merges": "\u0120correl ates", "count": 1096, "type": "remove by frequency"} -{"id": 38734, "token": "\u0120Friedman", "merges": "\u0120Fried man", "count": 1096, "type": "remove by frequency"} -{"id": 29411, "token": "\u0120etching", "merges": "\u0120et ching", "count": 1096, "type": "remove by frequency"} -{"id": 36313, "token": "\u00e3\u0123\u0126\u00e3\u0123\u00a6", "merges": "\u00e3\u0123\u0126 \u00e3\u0123\u00a6", "count": 1096, "type": "remove by frequency"} -{"id": 44241, "token": "=$(", "merges": "= $(", "count": 1096, "type": "remove by frequency"} -{"id": 49298, "token": "\u0120percussion", "merges": "\u0120perc ussion", "count": 1097, "type": "remove by frequency"} -{"id": 44061, "token": "\u0120flic", "merges": "\u0120f lic", "count": 1097, "type": "remove by frequency"} -{"id": 32940, "token": "conjugated", "merges": "conjug ated", "count": 1098, "type": "remove by frequency"} -{"id": 17138, "token": "\u0120Appendix", "merges": "\u0120App endix", "count": 1098, "type": "remove by frequency"} -{"id": 41921, "token": "\u0120Eu", "merges": "\u0120E u", "count": 1098, "type": "remove by frequency"} -{"id": 42063, "token": "\u00e0\u00a7\u012f", "merges": "\u00e0\u00a7 \u012f", "count": 1098, "type": "remove by frequency"} -{"id": 34640, "token": "monella", "merges": "mon ella", "count": 1099, "type": "remove by frequency"} -{"id": 28182, "token": "\u0120Papers", "merges": "\u0120P apers", "count": 1099, "type": "remove by frequency"} -{"id": 41272, "token": "atosis", "merges": "at osis", "count": 1099, "type": "remove by frequency"} -{"id": 21122, "token": "\u0120Jerry", "merges": "\u0120Jer ry", "count": 1099, "type": "remove by frequency"} -{"id": 13637, "token": "\u0120cultured", "merges": "\u0120cult ured", "count": 1100, "type": "remove by frequency"} -{"id": 34294, "token": "\u0120Therapy", "merges": "\u0120The rapy", "count": 1100, "type": "remove by frequency"} -{"id": 49781, "token": "\u0120glances", "merges": "\u0120gl ances", "count": 1100, "type": "remove by frequency"} -{"id": 13936, "token": "\u0120Holy", "merges": "\u0120Hol y", "count": 1100, "type": "remove by frequency"} -{"id": 14913, "token": "\u0120Duke", "merges": "\u0120Du ke", "count": 1100, "type": "remove by frequency"} -{"id": 45140, "token": "\u0120puncture", "merges": "\u0120punct ure", "count": 1101, "type": "remove by frequency"} -{"id": 49355, "token": "\u0120commentator", "merges": "\u0120comment ator", "count": 1103, "type": "remove by frequency"} -{"id": 39664, "token": "\u0120Instruments", "merges": "\u0120Instr uments", "count": 1103, "type": "remove by frequency"} -{"id": 12746, "token": "\u0120Columbia", "merges": "\u0120Columb ia", "count": 1103, "type": "remove by frequency"} -{"id": 27464, "token": "\u0120Revenue", "merges": "\u0120Rev enue", "count": 1103, "type": "remove by frequency"} -{"id": 47781, "token": "\u0120blasted", "merges": "\u0120bl asted", "count": 1103, "type": "remove by frequency"} -{"id": 39375, "token": "\u0120microp", "merges": "\u0120mic rop", "count": 1103, "type": "remove by frequency"} -{"id": 42712, "token": "\u0120Truck", "merges": "\u0120Tru ck", "count": 1103, "type": "remove by frequency"} -{"id": 30014, "token": "\u0120Berry", "merges": "\u0120Ber ry", "count": 1103, "type": "remove by frequency"} -{"id": 49556, "token": "\u0120clon", "merges": "\u0120cl on", "count": 1103, "type": "remove by frequency"} -{"id": 36652, "token": "\u0120Lum", "merges": "\u0120L um", "count": 1103, "type": "remove by frequency"} -{"id": 32112, "token": "\u0120elicited", "merges": "\u0120elic ited", "count": 1104, "type": "remove by frequency"} -{"id": 39814, "token": "\u0120nella", "merges": "\u0120n ella", "count": 1104, "type": "remove by frequency"} -{"id": 48968, "token": "\u0120Wien", "merges": "\u0120W ien", "count": 1104, "type": "remove by frequency"} -{"id": 31750, "token": "\u0120\u00cf\u0128", "merges": "\u0120 \u00cf\u0128", "count": 1104, "type": "remove by frequency"} -{"id": 45476, "token": "\u00d1\u0123\u00d1\u0124\u00d0\u00b2\u00d0\u00b5\u00d0\u00bd", "merges": "\u00d1\u0123\u00d1\u0124\u00d0\u00b2 \u00d0\u00b5\u00d0\u00bd", "count": 1105, "type": "remove by frequency"} -{"id": 32705, "token": "\u0120Companies", "merges": "\u0120Compan ies", "count": 1105, "type": "remove by frequency"} -{"id": 19187, "token": "\u0120resection", "merges": "\u0120rese ction", "count": 1105, "type": "remove by frequency"} -{"id": 39184, "token": "\u0120Genesis", "merges": "\u0120Gen esis", "count": 1105, "type": "remove by frequency"} -{"id": 47493, "token": "\u0120Introdu", "merges": "\u0120Int rodu", "count": 1105, "type": "remove by frequency"} -{"id": 39059, "token": "opropyl", "merges": "oprop yl", "count": 1105, "type": "remove by frequency"} -{"id": 32419, "token": "uitary", "merges": "uit ary", "count": 1105, "type": "remove by frequency"} -{"id": 26079, "token": "\u0120Bund", "merges": "\u0120B und", "count": 1105, "type": "remove by frequency"} -{"id": 23845, "token": "\u0120Diet", "merges": "\u0120D iet", "count": 1105, "type": "remove by frequency"} -{"id": 43778, "token": "\u0120comedian", "merges": "\u0120comed ian", "count": 1106, "type": "remove by frequency"} -{"id": 26165, "token": "\u0120Bh", "merges": "\u0120B h", "count": 1106, "type": "remove by frequency"} -{"id": 41166, "token": "\u0120Employees", "merges": "\u0120Employ ees", "count": 1107, "type": "remove by frequency"} -{"id": 23091, "token": "\u0120Princess", "merges": "\u0120Pr incess", "count": 1107, "type": "remove by frequency"} -{"id": 36969, "token": "\u0120Vacc", "merges": "\u0120V acc", "count": 1107, "type": "remove by frequency"} -{"id": 42048, "token": "\u0120toda", "merges": "\u0120to da", "count": 1107, "type": "remove by frequency"} -{"id": 42828, "token": "\u0120Kang", "merges": "\u0120K ang", "count": 1107, "type": "remove by frequency"} -{"id": 39194, "token": "\u0120Raz", "merges": "\u0120R az", "count": 1107, "type": "remove by frequency"} -{"id": 35718, "token": "\u00c2\u00b0,", "merges": "\u00c2\u00b0 ,", "count": 1107, "type": "remove by frequency"} -{"id": 36143, "token": "\u0120qualitatively", "merges": "\u0120qual itatively", "count": 1108, "type": "remove by frequency"} -{"id": 21741, "token": "\u0120Murphy", "merges": "\u0120Mur phy", "count": 1108, "type": "remove by frequency"} -{"id": 47315, "token": "\u0120Poetry", "merges": "\u0120Po etry", "count": 1108, "type": "remove by frequency"} -{"id": 45146, "token": "orbent", "merges": "orb ent", "count": 1108, "type": "remove by frequency"} -{"id": 45049, "token": "asmod", "merges": "as mod", "count": 1108, "type": "remove by frequency"} -{"id": 27627, "token": "\u0120aneurys", "merges": "\u0120aneur ys", "count": 1109, "type": "remove by frequency"} -{"id": 41931, "token": "\u0120Floyd", "merges": "\u0120Fl oyd", "count": 1109, "type": "remove by frequency"} -{"id": 35412, "token": "\u0120Clin", "merges": "\u0120Cl in", "count": 1109, "type": "remove by frequency"} -{"id": 32962, "token": "\u0120Dawn", "merges": "\u0120Da wn", "count": 1109, "type": "remove by frequency"} -{"id": 32739, "token": "\u0120urea", "merges": "\u0120ure a", "count": 1109, "type": "remove by frequency"} -{"id": 28214, "token": "\u0120Mountains", "merges": "\u0120Mount ains", "count": 1110, "type": "remove by frequency"} -{"id": 32500, "token": "\u0120SPSS", "merges": "\u0120S PSS", "count": 1110, "type": "remove by frequency"} -{"id": 29459, "token": "\u0120Neu", "merges": "\u0120Ne u", "count": 1110, "type": "remove by frequency"} -{"id": 36113, "token": "\u0120Dw", "merges": "\u0120D w", "count": 1110, "type": "remove by frequency"} -{"id": 39000, "token": "$}", "merges": "$ }", "count": 1110, "type": "remove by frequency"} -{"id": 47920, "token": "\u0120vibrational", "merges": "\u0120vibr ational", "count": 1111, "type": "remove by frequency"} -{"id": 31903, "token": "\u0120Manuel", "merges": "\u0120Man uel", "count": 1111, "type": "remove by frequency"} -{"id": 35018, "token": "\u0120Comics", "merges": "\u0120Com ics", "count": 1111, "type": "remove by frequency"} -{"id": 30374, "token": "\u0120abundances", "merges": "\u0120abund ances", "count": 1112, "type": "remove by frequency"} -{"id": 39383, "token": "\u0120theorems", "merges": "\u0120the orems", "count": 1112, "type": "remove by frequency"} -{"id": 20931, "token": "\u0120Benn", "merges": "\u0120B enn", "count": 1112, "type": "remove by frequency"} -{"id": 24006, "token": "\u0120Edwards", "merges": "\u0120Ed wards", "count": 1113, "type": "remove by frequency"} -{"id": 46016, "token": "\u00e3\u0123\u0139\u00e3\u0123\u0126", "merges": "\u00e3\u0123\u0139 \u00e3\u0123\u0126", "count": 1113, "type": "remove by frequency"} -{"id": 44648, "token": "\u0120v\u00c3\u00a1", "merges": "\u0120v \u00c3\u00a1", "count": 1113, "type": "remove by frequency"} -{"id": 31466, "token": "\u0120Interview", "merges": "\u0120Inter view", "count": 1114, "type": "remove by frequency"} -{"id": 28618, "token": "\u0120pentru", "merges": "\u0120pent ru", "count": 1114, "type": "remove by frequency"} -{"id": 32122, "token": "\u0120trache", "merges": "\u0120tr ache", "count": 1114, "type": "remove by frequency"} -{"id": 13986, "token": "\u0120Dutch", "merges": "\u0120D utch", "count": 1114, "type": "remove by frequency"} -{"id": 18151, "token": "poons", "merges": "po ons", "count": 1114, "type": "remove by frequency"} -{"id": 43054, "token": "\u0120leth", "merges": "\u0120le th", "count": 1114, "type": "remove by frequency"} -{"id": 12388, "token": "\u00ce\u00b9\u00ce\u00ba", "merges": "\u00ce\u00b9 \u00ce\u00ba", "count": 1114, "type": "remove by frequency"} -{"id": 35117, "token": ".[^", "merges": ".[ ^", "count": 1114, "type": "remove by frequency"} -{"id": 38428, "token": "aphylococcus", "merges": "aphyl ococcus", "count": 1115, "type": "remove by frequency"} -{"id": 50169, "token": "\u00e3\u0123\u00a3\u00e3\u0123\u00a6\u00e3\u0123\u0126\u00e3\u0124\u012d", "merges": "\u00e3\u0123\u00a3 \u00e3\u0123\u00a6\u00e3\u0123\u0126\u00e3\u0124\u012d", "count": 1115, "type": "remove by frequency"} -{"id": 17966, "token": "\u0120Warren", "merges": "\u0120War ren", "count": 1115, "type": "remove by frequency"} -{"id": 19147, "token": "\u0120abnormalities", "merges": "\u0120abnormal ities", "count": 1116, "type": "remove by frequency"} -{"id": 41398, "token": "\u0120analytically", "merges": "\u0120analyt ically", "count": 1116, "type": "remove by frequency"} -{"id": 25554, "token": "ocarcinoma", "merges": "ocarcin oma", "count": 1116, "type": "remove by frequency"} -{"id": 42632, "token": "acrylamide", "merges": "acry lamide", "count": 1116, "type": "remove by frequency"} -{"id": 43646, "token": "\u0120Guarant", "merges": "\u0120Gu arant", "count": 1116, "type": "remove by frequency"} -{"id": 24777, "token": "\u0120Hb", "merges": "\u0120H b", "count": 1116, "type": "remove by frequency"} -{"id": 27523, "token": "\u0120Reynolds", "merges": "\u0120Reyn olds", "count": 1118, "type": "remove by frequency"} -{"id": 45671, "token": "\u0120craving", "merges": "\u0120cra ving", "count": 1118, "type": "remove by frequency"} -{"id": 42154, "token": "\u0120spinach", "merges": "\u0120spin ach", "count": 1118, "type": "remove by frequency"} -{"id": 44591, "token": "\u0120Hammer", "merges": "\u0120Ham mer", "count": 1118, "type": "remove by frequency"} -{"id": 36780, "token": "\u0120PDE", "merges": "\u0120P DE", "count": 1118, "type": "remove by frequency"} -{"id": 43090, "token": "\u0120------------------------------", "merges": "\u0120---------------- --------------", "count": 1119, "type": "remove by frequency"} -{"id": 31105, "token": "\u0120impeachment", "merges": "\u0120impe achment", "count": 1119, "type": "remove by frequency"} -{"id": 48353, "token": "\u0120filmmakers", "merges": "\u0120filmm akers", "count": 1119, "type": "remove by frequency"} -{"id": 36345, "token": "\u0120Excell", "merges": "\u0120Ex cell", "count": 1119, "type": "remove by frequency"} -{"id": 40882, "token": "\u0120phonon", "merges": "\u0120phon on", "count": 1119, "type": "remove by frequency"} -{"id": 41438, "token": "\u0120Hair", "merges": "\u0120H air", "count": 1119, "type": "remove by frequency"} -{"id": 28231, "token": "}[\\", "merges": "} [\\", "count": 1119, "type": "remove by frequency"} -{"id": 47567, "token": "\u0120tutti", "merges": "\u0120tut ti", "count": 1120, "type": "remove by frequency"} -{"id": 49461, "token": "dried", "merges": "d ried", "count": 1120, "type": "remove by frequency"} -{"id": 39399, "token": "\u0120DSM", "merges": "\u0120D SM", "count": 1120, "type": "remove by frequency"} -{"id": 22957, "token": "\u0120oscillations", "merges": "\u0120oscill ations", "count": 1121, "type": "remove by frequency"} -{"id": 49148, "token": "\u0120favorably", "merges": "\u0120favor ably", "count": 1121, "type": "remove by frequency"} -{"id": 33708, "token": "\u0120incision", "merges": "\u0120inc ision", "count": 1121, "type": "remove by frequency"} -{"id": 40441, "token": "\u0120COMPANY", "merges": "\u0120COMP ANY", "count": 1121, "type": "remove by frequency"} -{"id": 47121, "token": "\u0120fiddle", "merges": "\u0120f iddle", "count": 1121, "type": "remove by frequency"} -{"id": 43769, "token": "\u0120Bacon", "merges": "\u0120B acon", "count": 1121, "type": "remove by frequency"} -{"id": 19980, "token": "\u0120Mitchell", "merges": "\u0120Mit chell", "count": 1122, "type": "remove by frequency"} -{"id": 41708, "token": "\u0120photometry", "merges": "\u0120phot ometry", "count": 1123, "type": "remove by frequency"} -{"id": 14540, "token": "\u0120Immun", "merges": "\u0120Im mun", "count": 1123, "type": "remove by frequency"} -{"id": 19468, "token": "\u0120quark", "merges": "\u0120qu ark", "count": 1123, "type": "remove by frequency"} -{"id": 25303, "token": "\u0120Burg", "merges": "\u0120B urg", "count": 1123, "type": "remove by frequency"} -{"id": 47597, "token": "\u0120Ia", "merges": "\u0120I a", "count": 1123, "type": "remove by frequency"} -{"id": 45554, "token": "\u0120precipitated", "merges": "\u0120precip itated", "count": 1124, "type": "remove by frequency"} -{"id": 47902, "token": "\u0120agitated", "merges": "\u0120ag itated", "count": 1124, "type": "remove by frequency"} -{"id": 45287, "token": "\u0120Teaching", "merges": "\u0120Te aching", "count": 1124, "type": "remove by frequency"} -{"id": 44456, "token": "otrophic", "merges": "ot rophic", "count": 1124, "type": "remove by frequency"} -{"id": 22748, "token": "\u0120NaCl", "merges": "\u0120Na Cl", "count": 1124, "type": "remove by frequency"} -{"id": 26709, "token": "\u0120Scar", "merges": "\u0120Sc ar", "count": 1124, "type": "remove by frequency"} -{"id": 17814, "token": "\u0120Everyone", "merges": "\u0120Every one", "count": 1125, "type": "remove by frequency"} -{"id": 24640, "token": "\u0120Valent", "merges": "\u0120Val ent", "count": 1125, "type": "remove by frequency"} -{"id": 37068, "token": "\u0120Cairo", "merges": "\u0120C airo", "count": 1125, "type": "remove by frequency"} -{"id": 35914, "token": "Hence", "merges": "H ence", "count": 1125, "type": "remove by frequency"} -{"id": 9890, "token": "\u0120Italian", "merges": "\u0120It alian", "count": 1126, "type": "remove by frequency"} -{"id": 48500, "token": "\u0120ankles", "merges": "\u0120ank les", "count": 1126, "type": "remove by frequency"} -{"id": 41629, "token": "\u0120ainda", "merges": "\u0120a inda", "count": 1126, "type": "remove by frequency"} -{"id": 45814, "token": "\u0120oddly", "merges": "\u0120odd ly", "count": 1126, "type": "remove by frequency"} -{"id": 14313, "token": "\u00e2\u0126\u00a2", "merges": "\u00e2\u0126 \u00a2", "count": 1126, "type": "remove by frequency"} -{"id": 30944, "token": "\u0120Meyer", "merges": "\u0120M eyer", "count": 1127, "type": "remove by frequency"} -{"id": 35457, "token": "\u00c5\u0124a", "merges": "\u00c5\u0124 a", "count": 1127, "type": "remove by frequency"} -{"id": 16891, "token": "\u0120distal", "merges": "\u0120dist al", "count": 1128, "type": "remove by frequency"} -{"id": 17413, "token": "\u0120Tokyo", "merges": "\u0120Tok yo", "count": 1128, "type": "remove by frequency"} -{"id": 43852, "token": "\u0120Eat", "merges": "\u0120E at", "count": 1128, "type": "remove by frequency"} -{"id": 40682, "token": "\u0120algun", "merges": "\u0120al gun", "count": 1129, "type": "remove by frequency"} -{"id": 16225, "token": "\u0120Victoria", "merges": "\u0120Victor ia", "count": 1130, "type": "remove by frequency"} -{"id": 32013, "token": "idepress", "merges": "ide press", "count": 1130, "type": "remove by frequency"} -{"id": 44325, "token": "\u0120Zhu", "merges": "\u0120Z hu", "count": 1130, "type": "remove by frequency"} -{"id": 46893, "token": "acycline", "merges": "acy cline", "count": 1131, "type": "remove by frequency"} -{"id": 39039, "token": "\u0120probed", "merges": "\u0120prob ed", "count": 1131, "type": "remove by frequency"} -{"id": 34239, "token": "\u0120Bod", "merges": "\u0120B od", "count": 1132, "type": "remove by frequency"} -{"id": 50318, "token": "\u00e2\u012a\u0140", "merges": "\u00e2\u012a \u0140", "count": 1132, "type": "remove by frequency"} -{"id": 48946, "token": "\u0120){", "merges": "\u0120 ){", "count": 1132, "type": "remove by frequency"} -{"id": 36959, "token": "\u0120soll", "merges": "\u0120sol l", "count": 1133, "type": "remove by frequency"} -{"id": 36533, "token": "\u0120Rex", "merges": "\u0120R ex", "count": 1133, "type": "remove by frequency"} -{"id": 13658, "token": "\u0120Specifically", "merges": "\u0120Spec ifically", "count": 1134, "type": "remove by frequency"} -{"id": 26157, "token": "\u0120Opinion", "merges": "\u0120O pinion", "count": 1134, "type": "remove by frequency"} -{"id": 27082, "token": "\u0120k\u00c3\u00a4", "merges": "\u0120k \u00c3\u00a4", "count": 1134, "type": "remove by frequency"} -{"id": 34008, "token": "\u0120Coulomb", "merges": "\u0120Coul omb", "count": 1135, "type": "remove by frequency"} -{"id": 6112, "token": "\u0120Texas", "merges": "\u0120Tex as", "count": 1135, "type": "remove by frequency"} -{"id": 46839, "token": "\u0120Mink", "merges": "\u0120M ink", "count": 1135, "type": "remove by frequency"} -{"id": 35538, "token": "\u0120PSA", "merges": "\u0120P SA", "count": 1135, "type": "remove by frequency"} -{"id": 31796, "token": "Appendix", "merges": "App endix", "count": 1136, "type": "remove by frequency"} -{"id": 48196, "token": "\u0120slut", "merges": "\u0120sl ut", "count": 1136, "type": "remove by frequency"} -{"id": 44025, "token": "\u0120Tou", "merges": "\u0120T ou", "count": 1137, "type": "remove by frequency"} -{"id": 28142, "token": "\u0120Cf", "merges": "\u0120C f", "count": 1137, "type": "remove by frequency"} -{"id": 41381, "token": "\u0120pulsed", "merges": "\u0120pul sed", "count": 1138, "type": "remove by frequency"} -{"id": 44406, "token": "plasty", "merges": "pl asty", "count": 1138, "type": "remove by frequency"} -{"id": 42008, "token": "\u0120alkal", "merges": "\u0120alk al", "count": 1138, "type": "remove by frequency"} -{"id": 45617, "token": "\u0120FOUR", "merges": "\u0120F OUR", "count": 1138, "type": "remove by frequency"} -{"id": 36017, "token": "\u00e1\u0125\u0136", "merges": "\u00e1\u0125 \u0136", "count": 1138, "type": "remove by frequency"} -{"id": 20185, "token": "\u0120asymptotic", "merges": "\u0120asympt otic", "count": 1139, "type": "remove by frequency"} -{"id": 47110, "token": "DECLARE", "merges": "DECL ARE", "count": 1139, "type": "remove by frequency"} -{"id": 43517, "token": "\u0120peered", "merges": "\u0120pe ered", "count": 1139, "type": "remove by frequency"} -{"id": 43822, "token": "\u0120umbil", "merges": "\u0120umb il", "count": 1139, "type": "remove by frequency"} -{"id": 44522, "token": "\u0120Boat", "merges": "\u0120Bo at", "count": 1139, "type": "remove by frequency"} -{"id": 16002, "token": "\u0120postoperative", "merges": "\u0120post operative", "count": 1140, "type": "remove by frequency"} -{"id": 29362, "token": "\u0120transporter", "merges": "\u0120trans porter", "count": 1140, "type": "remove by frequency"} -{"id": 41486, "token": "\u0120Publishers", "merges": "\u0120Publ ishers", "count": 1140, "type": "remove by frequency"} -{"id": 34481, "token": "\u0120steroids", "merges": "\u0120ster oids", "count": 1140, "type": "remove by frequency"} -{"id": 39632, "token": "\u0120Programs", "merges": "\u0120Pro grams", "count": 1140, "type": "remove by frequency"} -{"id": 43540, "token": "\u0120Rising", "merges": "\u0120R ising", "count": 1140, "type": "remove by frequency"} -{"id": 11442, "token": "\u0120Northern", "merges": "\u0120N orthern", "count": 1141, "type": "remove by frequency"} -{"id": 14757, "token": "\u0120induces", "merges": "\u0120indu ces", "count": 1141, "type": "remove by frequency"} -{"id": 11867, "token": "\u0120Eastern", "merges": "\u0120E astern", "count": 1141, "type": "remove by frequency"} -{"id": 18671, "token": "\u0120Marine", "merges": "\u0120Mar ine", "count": 1141, "type": "remove by frequency"} -{"id": 31540, "token": "\u0120Bach", "merges": "\u0120B ach", "count": 1141, "type": "remove by frequency"} -{"id": 17042, "token": "}$)", "merges": "}$ )", "count": 1141, "type": "remove by frequency"} -{"id": 30314, "token": "\u0120Initiative", "merges": "\u0120Init iative", "count": 1142, "type": "remove by frequency"} -{"id": 20150, "token": "\u0120\u00c3\u00bcber", "merges": "\u0120\u00c3\u00bc ber", "count": 1142, "type": "remove by frequency"} -{"id": 17549, "token": "\u0120Raj", "merges": "\u0120R aj", "count": 1142, "type": "remove by frequency"} -{"id": 26096, "token": "\u0120Kor", "merges": "\u0120K or", "count": 1142, "type": "remove by frequency"} -{"id": 20628, "token": "\u0120Samuel", "merges": "\u0120Sam uel", "count": 1144, "type": "remove by frequency"} -{"id": 27269, "token": "\u0120aneur", "merges": "\u0120an eur", "count": 1144, "type": "remove by frequency"} -{"id": 48688, "token": "\u0120Frontier", "merges": "\u0120Front ier", "count": 1145, "type": "remove by frequency"} -{"id": 47852, "token": "\u0120narciss", "merges": "\u0120narc iss", "count": 1145, "type": "remove by frequency"} -{"id": 33904, "token": "\u0120Powers", "merges": "\u0120Pow ers", "count": 1145, "type": "remove by frequency"} -{"id": 31157, "token": "\u0120hilar", "merges": "\u0120h ilar", "count": 1145, "type": "remove by frequency"} -{"id": 16067, "token": "\u0120glyc", "merges": "\u0120gly c", "count": 1145, "type": "remove by frequency"} -{"id": 28230, "token": "\u0120Roth", "merges": "\u0120R oth", "count": 1145, "type": "remove by frequency"} -{"id": 36920, "token": "\u0120Sou", "merges": "\u0120S ou", "count": 1145, "type": "remove by frequency"} -{"id": 43071, "token": "\u0120Examination", "merges": "\u0120Ex amination", "count": 1146, "type": "remove by frequency"} -{"id": 17604, "token": "\u0120ventricular", "merges": "\u0120vent ricular", "count": 1146, "type": "remove by frequency"} -{"id": 45697, "token": "\u0120anecd", "merges": "\u0120anec d", "count": 1146, "type": "remove by frequency"} -{"id": 40808, "token": "\u00e0\u00a4\u00b5", "merges": "\u00e0\u00a4 \u00b5", "count": 1146, "type": "remove by frequency"} -{"id": 45167, "token": "\u0120recollection", "merges": "\u0120recol lection", "count": 1147, "type": "remove by frequency"} -{"id": 47348, "token": "\u0120scratching", "merges": "\u0120scr atching", "count": 1147, "type": "remove by frequency"} -{"id": 42027, "token": "\u0120Flying", "merges": "\u0120F lying", "count": 1147, "type": "remove by frequency"} -{"id": 28409, "token": "\u0120ounces", "merges": "\u0120oun ces", "count": 1147, "type": "remove by frequency"} -{"id": 19381, "token": "\u0120Marx", "merges": "\u0120Mar x", "count": 1147, "type": "remove by frequency"} -{"id": 42966, "token": "\u0120rien", "merges": "\u0120ri en", "count": 1147, "type": "remove by frequency"} -{"id": 36783, "token": "\u0120fois", "merges": "\u0120fo is", "count": 1147, "type": "remove by frequency"} -{"id": 9804, "token": "\u0120\u00c2\u00b5", "merges": "\u0120\u00c2 \u00b5", "count": 1147, "type": "remove by frequency"} -{"id": 46704, "token": "\u0120precludes", "merges": "\u0120pre cludes", "count": 1148, "type": "remove by frequency"} -{"id": 50069, "token": "Previously", "merges": "Pre viously", "count": 1148, "type": "remove by frequency"} -{"id": 26648, "token": "NSString", "merges": "NS String", "count": 1148, "type": "remove by frequency"} -{"id": 18258, "token": "\u0120tumour", "merges": "\u0120tum our", "count": 1148, "type": "remove by frequency"} -{"id": 23916, "token": "\u0120venous", "merges": "\u0120ven ous", "count": 1148, "type": "remove by frequency"} -{"id": 22012, "token": "\u0120DAM", "merges": "\u0120D AM", "count": 1148, "type": "remove by frequency"} -{"id": 44364, "token": "addEventListener", "merges": "add EventListener", "count": 1149, "type": "remove by frequency"} -{"id": 50119, "token": "\u0120bijection", "merges": "\u0120bi jection", "count": 1149, "type": "remove by frequency"} -{"id": 39916, "token": "\u0120peppers", "merges": "\u0120pe ppers", "count": 1149, "type": "remove by frequency"} -{"id": 27942, "token": "cemia", "merges": "ce mia", "count": 1149, "type": "remove by frequency"} -{"id": 46759, "token": "\u0120Cars", "merges": "\u0120C ars", "count": 1149, "type": "remove by frequency"} -{"id": 36481, "token": "\u0120MLB", "merges": "\u0120ML B", "count": 1149, "type": "remove by frequency"} -{"id": 34770, "token": "\u0120electrochemical", "merges": "\u0120electro chemical", "count": 1150, "type": "remove by frequency"} -{"id": 48101, "token": "\u0120retarded", "merges": "\u0120ret arded", "count": 1150, "type": "remove by frequency"} -{"id": 27870, "token": "\u0120si\u00c4\u013b", "merges": "\u0120si \u00c4\u013b", "count": 1150, "type": "remove by frequency"} -{"id": 32025, "token": "REEK", "merges": "REE K", "count": 1150, "type": "remove by frequency"} -{"id": 46978, "token": "\u00d8\u00a7\u00d8\u00b3", "merges": "\u00d8\u00a7\u00d8 \u00b3", "count": 1150, "type": "remove by frequency"} -{"id": 49529, "token": "\u0120Dre", "merges": "\u0120D re", "count": 1150, "type": "remove by frequency"} -{"id": 11915, "token": "\u0120Uk", "merges": "\u0120U k", "count": 1150, "type": "remove by frequency"} -{"id": 37903, "token": "\u0120scaffolds", "merges": "\u0120scaff olds", "count": 1151, "type": "remove by frequency"} -{"id": 29498, "token": "\u0120predis", "merges": "\u0120pred is", "count": 1151, "type": "remove by frequency"} -{"id": 10889, "token": "ellee", "merges": "elle e", "count": 1151, "type": "remove by frequency"} -{"id": 24518, "token": "\u0120Reed", "merges": "\u0120Re ed", "count": 1151, "type": "remove by frequency"} -{"id": 29945, "token": "enchymal", "merges": "ench ymal", "count": 1152, "type": "remove by frequency"} -{"id": 24887, "token": "\u0120quel", "merges": "\u0120qu el", "count": 1152, "type": "remove by frequency"} -{"id": 25686, "token": "\u0120Mans", "merges": "\u0120M ans", "count": 1152, "type": "remove by frequency"} -{"id": 26831, "token": "\u0120glycer", "merges": "\u0120gly cer", "count": 1153, "type": "remove by frequency"} -{"id": 33985, "token": "\u0120donc", "merges": "\u0120don c", "count": 1153, "type": "remove by frequency"} -{"id": 35540, "token": "\u0120Dust", "merges": "\u0120D ust", "count": 1153, "type": "remove by frequency"} -{"id": 22901, "token": "\u0120Guardian", "merges": "\u0120Guard ian", "count": 1154, "type": "remove by frequency"} -{"id": 46191, "token": "\u0120Warrior", "merges": "\u0120War rior", "count": 1154, "type": "remove by frequency"} -{"id": 4862, "token": "overline", "merges": "over line", "count": 1154, "type": "remove by frequency"} -{"id": 49881, "token": "\u0120Fritz", "merges": "\u0120F ritz", "count": 1154, "type": "remove by frequency"} -{"id": 40242, "token": "\u0120Equations", "merges": "\u0120Equ ations", "count": 1155, "type": "remove by frequency"} -{"id": 43284, "token": "\u0120avoir", "merges": "\u0120av oir", "count": 1155, "type": "remove by frequency"} -{"id": 21975, "token": "\u0120Alb", "merges": "\u0120Al b", "count": 1155, "type": "remove by frequency"} -{"id": 38004, "token": "\u0120Dum", "merges": "\u0120D um", "count": 1155, "type": "remove by frequency"} -{"id": 36878, "token": "\u0120quantitatively", "merges": "\u0120quant itatively", "count": 1156, "type": "remove by frequency"} -{"id": 32605, "token": "\u0120bifur", "merges": "\u0120bif ur", "count": 1156, "type": "remove by frequency"} -{"id": 35603, "token": "\u00c3\u00bcller", "merges": "\u00c3\u00bc ller", "count": 1156, "type": "remove by frequency"} -{"id": 41698, "token": "ibilit", "merges": "ib ilit", "count": 1156, "type": "remove by frequency"} -{"id": 22633, "token": "\u0120Cla", "merges": "\u0120Cl a", "count": 1156, "type": "remove by frequency"} -{"id": 31683, "token": "\u0120Oz", "merges": "\u0120O z", "count": 1156, "type": "remove by frequency"} -{"id": 38427, "token": "\u0120responders", "merges": "\u0120respond ers", "count": 1157, "type": "remove by frequency"} -{"id": 35754, "token": "unreadable", "merges": "un readable", "count": 1157, "type": "remove by frequency"} -{"id": 41453, "token": "\u0120battling", "merges": "\u0120batt ling", "count": 1157, "type": "remove by frequency"} -{"id": 40732, "token": "\u0120trout", "merges": "\u0120tr out", "count": 1157, "type": "remove by frequency"} -{"id": 32977, "token": "\u0120Hav", "merges": "\u0120H av", "count": 1157, "type": "remove by frequency"} -{"id": 32198, "token": "\u0120Ancient", "merges": "\u0120An cient", "count": 1158, "type": "remove by frequency"} -{"id": 48400, "token": "\u0120Primer", "merges": "\u0120Pr imer", "count": 1158, "type": "remove by frequency"} -{"id": 38920, "token": "\u0120mildly", "merges": "\u0120mild ly", "count": 1158, "type": "remove by frequency"} -{"id": 50151, "token": "\u0120spirituality", "merges": "\u0120spirit uality", "count": 1159, "type": "remove by frequency"} -{"id": 26414, "token": "\u0120converges", "merges": "\u0120conver ges", "count": 1159, "type": "remove by frequency"} -{"id": 29034, "token": "\u0120playoff", "merges": "\u0120play off", "count": 1159, "type": "remove by frequency"} -{"id": 45582, "token": "\u0120disbelief", "merges": "\u0120dis belief", "count": 1160, "type": "remove by frequency"} -{"id": 46721, "token": "\u0120drilled", "merges": "\u0120dr illed", "count": 1160, "type": "remove by frequency"} -{"id": 44963, "token": "\u0120acetic", "merges": "\u0120ac etic", "count": 1160, "type": "remove by frequency"} -{"id": 46636, "token": "\u0120calor", "merges": "\u0120cal or", "count": 1160, "type": "remove by frequency"} -{"id": 40499, "token": "\u0120Activities", "merges": "\u0120Act ivities", "count": 1161, "type": "remove by frequency"} -{"id": 34370, "token": "\u0120Mari", "merges": "\u0120M ari", "count": 1161, "type": "remove by frequency"} -{"id": 39495, "token": "ieties", "merges": "iet ies", "count": 1162, "type": "remove by frequency"} -{"id": 44900, "token": "\u0120Ib", "merges": "\u0120I b", "count": 1162, "type": "remove by frequency"} -{"id": 29802, "token": "\u0120\u00d9\u0129", "merges": "\u0120 \u00d9\u0129", "count": 1162, "type": "remove by frequency"} -{"id": 10681, "token": "\u00e0\u00b0", "merges": "\u00e0 \u00b0", "count": 1162, "type": "remove by frequency"} -{"id": 21903, "token": "\u0120phenotypes", "merges": "\u0120phen otypes", "count": 1163, "type": "remove by frequency"} -{"id": 30706, "token": "\u0120Enjoy", "merges": "\u0120En joy", "count": 1163, "type": "remove by frequency"} -{"id": 49816, "token": "\u0120adjunct", "merges": "\u0120adj unct", "count": 1164, "type": "remove by frequency"} -{"id": 44927, "token": "\u0120anhydr", "merges": "\u0120an hydr", "count": 1164, "type": "remove by frequency"} -{"id": 16019, "token": "\u0120Phill", "merges": "\u0120Ph ill", "count": 1164, "type": "remove by frequency"} -{"id": 26135, "token": "\u0120Cort", "merges": "\u0120C ort", "count": 1164, "type": "remove by frequency"} -{"id": 46769, "token": "\u0120Keeping", "merges": "\u0120Ke eping", "count": 1165, "type": "remove by frequency"} -{"id": 39582, "token": "\u0120Drum", "merges": "\u0120Dr um", "count": 1165, "type": "remove by frequency"} -{"id": 18431, "token": "Several", "merges": "S everal", "count": 1166, "type": "remove by frequency"} -{"id": 50121, "token": "\u0120ici", "merges": "\u0120 ici", "count": 1166, "type": "remove by frequency"} -{"id": 44413, "token": "\u0120gripping", "merges": "\u0120gri pping", "count": 1167, "type": "remove by frequency"} -{"id": 24964, "token": "\u0120lymphoma", "merges": "\u0120lymph oma", "count": 1167, "type": "remove by frequency"} -{"id": 43483, "token": "\u0120Thinking", "merges": "\u0120Th inking", "count": 1167, "type": "remove by frequency"} -{"id": 21788, "token": "\u0120Highway", "merges": "\u0120High way", "count": 1167, "type": "remove by frequency"} -{"id": 31928, "token": "\u0120a\u00c3\u00b1os", "merges": "\u0120a \u00c3\u00b1os", "count": 1167, "type": "remove by frequency"} -{"id": 34876, "token": "\u0120Sue", "merges": "\u0120S ue", "count": 1167, "type": "remove by frequency"} -{"id": 34754, "token": "\u0120approximations", "merges": "\u0120approxim ations", "count": 1168, "type": "remove by frequency"} -{"id": 43655, "token": "\u0120Principles", "merges": "\u0120Princ iples", "count": 1168, "type": "remove by frequency"} -{"id": 48414, "token": "\u0120comport", "merges": "\u0120comp ort", "count": 1168, "type": "remove by frequency"} -{"id": 30921, "token": "\u0120Photos", "merges": "\u0120Phot os", "count": 1168, "type": "remove by frequency"} -{"id": 18770, "token": "\u0120Much", "merges": "\u0120M uch", "count": 1168, "type": "remove by frequency"} -{"id": 40973, "token": "\u010a\u0109\u0109\u0120", "merges": "\u010a\u0109\u0109 \u0120", "count": 1168, "type": "remove by frequency"} -{"id": 40603, "token": "\u0120haplotype", "merges": "\u0120hapl otype", "count": 1169, "type": "remove by frequency"} -{"id": 43041, "token": "\u0120AFL", "merges": "\u0120A FL", "count": 1169, "type": "remove by frequency"} -{"id": 46294, "token": "'=>", "merges": "' =>", "count": 1169, "type": "remove by frequency"} -{"id": 34902, "token": "\u0120symmetries", "merges": "\u0120symmet ries", "count": 1170, "type": "remove by frequency"} -{"id": 38167, "token": "\u0120parasitic", "merges": "\u0120paras itic", "count": 1170, "type": "remove by frequency"} -{"id": 25849, "token": "\u0120\u00d8\u00b9", "merges": "\u0120\u00d8 \u00b9", "count": 1170, "type": "remove by frequency"} -{"id": 48293, "token": "\u0120straightened", "merges": "\u0120straight ened", "count": 1171, "type": "remove by frequency"} -{"id": 39276, "token": "\u0120suicidal", "merges": "\u0120su icidal", "count": 1171, "type": "remove by frequency"} -{"id": 50245, "token": "\u0120toddler", "merges": "\u0120todd ler", "count": 1171, "type": "remove by frequency"} -{"id": 22547, "token": "\u0120Bernard", "merges": "\u0120Bern ard", "count": 1171, "type": "remove by frequency"} -{"id": 32703, "token": "\u0120-------------------------", "merges": "\u0120---------------- ---------", "count": 1172, "type": "remove by frequency"} -{"id": 43546, "token": "\u0120elasticity", "merges": "\u0120elastic ity", "count": 1172, "type": "remove by frequency"} -{"id": 48364, "token": "\u0120accl", "merges": "\u0120ac cl", "count": 1172, "type": "remove by frequency"} -{"id": 46414, "token": "\u0120SSR", "merges": "\u0120S SR", "count": 1172, "type": "remove by frequency"} -{"id": 17528, "token": "\u0120Affairs", "merges": "\u0120Aff airs", "count": 1173, "type": "remove by frequency"} -{"id": 47251, "token": "\u0120(+)", "merges": "\u0120( +)", "count": 1173, "type": "remove by frequency"} -{"id": 11630, "token": "\u00cf\u012f", "merges": "\u00cf \u012f", "count": 1173, "type": "remove by frequency"} -{"id": 40503, "token": "\u0120biodiversity", "merges": "\u0120biod iversity", "count": 1174, "type": "remove by frequency"} -{"id": 50012, "token": "\u0120Rings", "merges": "\u0120R ings", "count": 1174, "type": "remove by frequency"} -{"id": 34271, "token": "\u0120\u00e1\u0125", "merges": "\u0120 \u00e1\u0125", "count": 1174, "type": "remove by frequency"} -{"id": 12362, "token": "\u0120Tenn", "merges": "\u0120T enn", "count": 1175, "type": "remove by frequency"} -{"id": 26789, "token": "\u0120Nine", "merges": "\u0120N ine", "count": 1175, "type": "remove by frequency"} -{"id": 46931, "token": "))^", "merges": ")) ^", "count": 1175, "type": "remove by frequency"} -{"id": 38183, "token": "\u0120combinatorial", "merges": "\u0120combinator ial", "count": 1176, "type": "remove by frequency"} -{"id": 27939, "token": "\u0120degeneration", "merges": "\u0120de generation", "count": 1176, "type": "remove by frequency"} -{"id": 8341, "token": "\u0120plaintiffs", "merges": "\u0120plaintiff s", "count": 1176, "type": "remove by frequency"} -{"id": 35673, "token": "textwidth", "merges": "text width", "count": 1176, "type": "remove by frequency"} -{"id": 41221, "token": "\u0120Edited", "merges": "\u0120Ed ited", "count": 1176, "type": "remove by frequency"} -{"id": 24126, "token": "\u0120Curt", "merges": "\u0120C urt", "count": 1176, "type": "remove by frequency"} -{"id": 22718, "token": "\u0120Mathematics", "merges": "\u0120Mathemat ics", "count": 1177, "type": "remove by frequency"} -{"id": 47779, "token": "\u0120fractured", "merges": "\u0120fract ured", "count": 1177, "type": "remove by frequency"} -{"id": 45015, "token": "\u0120wrists", "merges": "\u0120wr ists", "count": 1177, "type": "remove by frequency"} -{"id": 50041, "token": "\u0120diodes", "merges": "\u0120di odes", "count": 1178, "type": "remove by frequency"} -{"id": 40713, "token": "\u0120Didn", "merges": "\u0120Did n", "count": 1178, "type": "remove by frequency"} -{"id": 43416, "token": "\u0120commut", "merges": "\u0120comm ut", "count": 1179, "type": "remove by frequency"} -{"id": 45052, "token": "\u0120invas", "merges": "\u0120inv as", "count": 1179, "type": "remove by frequency"} -{"id": 44348, "token": "ipore", "merges": "ip ore", "count": 1179, "type": "remove by frequency"} -{"id": 29182, "token": "\u0120cryptocurrency", "merges": "\u0120cryptocur rency", "count": 1180, "type": "remove by frequency"} -{"id": 18704, "token": "\u0120saline", "merges": "\u0120sal ine", "count": 1180, "type": "remove by frequency"} -{"id": 30533, "token": "\u0120Ow", "merges": "\u0120O w", "count": 1180, "type": "remove by frequency"} -{"id": 23061, "token": "\u0120Samsung", "merges": "\u0120Sams ung", "count": 1181, "type": "remove by frequency"} -{"id": 13534, "token": "\u0120Bible", "merges": "\u0120B ible", "count": 1181, "type": "remove by frequency"} -{"id": 38973, "token": "\u0120Jung", "merges": "\u0120J ung", "count": 1181, "type": "remove by frequency"} -{"id": 21231, "token": "\u0120Ple", "merges": "\u0120P le", "count": 1181, "type": "remove by frequency"} -{"id": 37723, "token": "\u0120Dil", "merges": "\u0120D il", "count": 1182, "type": "remove by frequency"} -{"id": 50736, "token": "\u00e5\u0128\u00be", "merges": "\u00e5\u0128 \u00be", "count": 1182, "type": "remove by frequency"} -{"id": 48019, "token": "\u0120stato", "merges": "\u0120stat o", "count": 1183, "type": "remove by frequency"} -{"id": 15713, "token": "\u0120Hind", "merges": "\u0120H ind", "count": 1183, "type": "remove by frequency"} -{"id": 16543, "token": "\u0120Anna", "merges": "\u0120An na", "count": 1183, "type": "remove by frequency"} -{"id": 31156, "token": "\u0120longtime", "merges": "\u0120long time", "count": 1184, "type": "remove by frequency"} -{"id": 33721, "token": "iotensin", "merges": "iot ensin", "count": 1184, "type": "remove by frequency"} -{"id": 17515, "token": "\u0120aqueous", "merges": "\u0120a queous", "count": 1184, "type": "remove by frequency"} -{"id": 26456, "token": "\u0120Vincent", "merges": "\u0120Vin cent", "count": 1184, "type": "remove by frequency"} -{"id": 10807, "token": "\u0120BOOST", "merges": "\u0120B OOST", "count": 1184, "type": "remove by frequency"} -{"id": 40455, "token": "\u00e0\u00b9\u0123", "merges": "\u00e0\u00b9 \u0123", "count": 1184, "type": "remove by frequency"} -{"id": 49490, "token": "DCs", "merges": "D Cs", "count": 1184, "type": "remove by frequency"} -{"id": 31198, "token": "\u0120WordPress", "merges": "\u0120Word Press", "count": 1185, "type": "remove by frequency"} -{"id": 20300, "token": "Rearrange", "merges": "Rear range", "count": 1186, "type": "remove by frequency"} -{"id": 38449, "token": "\u0120blinked", "merges": "\u0120blink ed", "count": 1186, "type": "remove by frequency"} -{"id": 10596, "token": "\u0120apoptosis", "merges": "\u0120apopt osis", "count": 1187, "type": "remove by frequency"} -{"id": 39548, "token": "acetic", "merges": "ac etic", "count": 1187, "type": "remove by frequency"} -{"id": 27824, "token": "\u0120l\u00c3\u0142", "merges": "\u0120l \u00c3\u0142", "count": 1187, "type": "remove by frequency"} -{"id": 39580, "token": "\u0120QB", "merges": "\u0120Q B", "count": 1187, "type": "remove by frequency"} -{"id": 49985, "token": "\u0120semiconduct", "merges": "\u0120semicon duct", "count": 1188, "type": "remove by frequency"} -{"id": 44148, "token": "\u0120caregiver", "merges": "\u0120careg iver", "count": 1189, "type": "remove by frequency"} -{"id": 42054, "token": "\u0120Jensen", "merges": "\u0120J ensen", "count": 1189, "type": "remove by frequency"} -{"id": 49207, "token": "\u0120acquittal", "merges": "\u0120acqu ittal", "count": 1190, "type": "remove by frequency"} -{"id": 16184, "token": "\u0120lact", "merges": "\u0120l act", "count": 1190, "type": "remove by frequency"} -{"id": 48199, "token": "\u0120prosthetic", "merges": "\u0120prost hetic", "count": 1191, "type": "remove by frequency"} -{"id": 43117, "token": "\u0120resembled", "merges": "\u0120resemb led", "count": 1191, "type": "remove by frequency"} -{"id": 46870, "token": "\u0120stitches", "merges": "\u0120st itches", "count": 1191, "type": "remove by frequency"} -{"id": 47259, "token": "STITUTE", "merges": "STIT UTE", "count": 1191, "type": "remove by frequency"} -{"id": 24161, "token": "\u0120Against", "merges": "\u0120Again st", "count": 1192, "type": "remove by frequency"} -{"id": 32534, "token": "\u0120antif", "merges": "\u0120ant if", "count": 1192, "type": "remove by frequency"} -{"id": 32758, "token": "rectomy", "merges": "rect omy", "count": 1193, "type": "remove by frequency"} -{"id": 33103, "token": "\u0120translational", "merges": "\u0120transl ational", "count": 1194, "type": "remove by frequency"} -{"id": 47431, "token": "\u0120cataract", "merges": "\u0120catar act", "count": 1194, "type": "remove by frequency"} -{"id": 26818, "token": "iviral", "merges": "iv iral", "count": 1194, "type": "remove by frequency"} -{"id": 27971, "token": "veolar", "merges": "ve olar", "count": 1194, "type": "remove by frequency"} -{"id": 21243, "token": "\u00c3\u00a4h", "merges": "\u00c3\u00a4 h", "count": 1194, "type": "remove by frequency"} -{"id": 39936, "token": "\u00e1\u0141", "merges": "\u00e1 \u0141", "count": 1194, "type": "remove by frequency"} -{"id": 23607, "token": "\u0120Official", "merges": "\u0120Offic ial", "count": 1195, "type": "remove by frequency"} -{"id": 21425, "token": "\u0120inhibits", "merges": "\u0120inhib its", "count": 1196, "type": "remove by frequency"} -{"id": 48657, "token": "\u0120mundane", "merges": "\u0120mund ane", "count": 1196, "type": "remove by frequency"} -{"id": 35901, "token": "\u0120scav", "merges": "\u0120sc av", "count": 1196, "type": "remove by frequency"} -{"id": 23803, "token": "\u0120intravenous", "merges": "\u0120intraven ous", "count": 1197, "type": "remove by frequency"} -{"id": 39861, "token": "\u0120anaerobic", "merges": "\u0120ana erobic", "count": 1199, "type": "remove by frequency"} -{"id": 24618, "token": "\u0120Falls", "merges": "\u0120F alls", "count": 1199, "type": "remove by frequency"} -{"id": 29663, "token": "\u0120Lad", "merges": "\u0120L ad", "count": 1199, "type": "remove by frequency"} -{"id": 13899, "token": "sime", "merges": "s ime", "count": 1199, "type": "remove by frequency"} -{"id": 48220, "token": "\u0120Cannon", "merges": "\u0120C annon", "count": 1200, "type": "remove by frequency"} -{"id": 35230, "token": "\u0120Gang", "merges": "\u0120G ang", "count": 1200, "type": "remove by frequency"} -{"id": 16163, "token": "\u00c8\u013b", "merges": "\u00c8 \u013b", "count": 1200, "type": "remove by frequency"} -{"id": 18623, "token": "\u0120Bron", "merges": "\u0120B ron", "count": 1201, "type": "remove by frequency"} -{"id": 43358, "token": "\u0120LOT", "merges": "\u0120L OT", "count": 1201, "type": "remove by frequency"} -{"id": 38649, "token": "\u0120Aid", "merges": "\u0120A id", "count": 1201, "type": "remove by frequency"} -{"id": 21657, "token": "\u0120experimentally", "merges": "\u0120experiment ally", "count": 1202, "type": "remove by frequency"} -{"id": 48605, "token": "\u0120playwright", "merges": "\u0120play wright", "count": 1202, "type": "remove by frequency"} -{"id": 12808, "token": "\u0120Kent", "merges": "\u0120K ent", "count": 1202, "type": "remove by frequency"} -{"id": 33377, "token": "\u0120Sach", "merges": "\u0120S ach", "count": 1202, "type": "remove by frequency"} -{"id": 30537, "token": "](\\", "merges": "]( \\", "count": 1202, "type": "remove by frequency"} -{"id": 41598, "token": "\u0120Mint", "merges": "\u0120M int", "count": 1203, "type": "remove by frequency"} -{"id": 30303, "token": "\u00cf\u012a", "merges": "\u00cf \u012a", "count": 1203, "type": "remove by frequency"} -{"id": 20933, "token": "\u00ce\u00be", "merges": "\u00ce \u00be", "count": 1203, "type": "remove by frequency"} -{"id": 24043, "token": "\u0120infarction", "merges": "\u0120infar ction", "count": 1204, "type": "remove by frequency"} -{"id": 28685, "token": "\u0120ventric", "merges": "\u0120vent ric", "count": 1204, "type": "remove by frequency"} -{"id": 27634, "token": "\u0120prophyl", "merges": "\u0120pro phyl", "count": 1205, "type": "remove by frequency"} -{"id": 49613, "token": "\u0120spying", "merges": "\u0120sp ying", "count": 1205, "type": "remove by frequency"} -{"id": 32188, "token": "\u0120cirrh", "merges": "\u0120cir rh", "count": 1205, "type": "remove by frequency"} -{"id": 47290, "token": "r\u00c3\u00b3", "merges": "r \u00c3\u00b3", "count": 1205, "type": "remove by frequency"} -{"id": 38674, "token": "\u0120cavities", "merges": "\u0120cav ities", "count": 1206, "type": "remove by frequency"} -{"id": 33334, "token": "\u0120Carn", "merges": "\u0120C arn", "count": 1206, "type": "remove by frequency"} -{"id": 46161, "token": "iamo", "merges": "i amo", "count": 1206, "type": "remove by frequency"} -{"id": 49118, "token": "\u0120hens", "merges": "\u0120he ns", "count": 1207, "type": "remove by frequency"} -{"id": 17366, "token": "\u00c3\u00a4l", "merges": "\u00c3\u00a4 l", "count": 1207, "type": "remove by frequency"} -{"id": 41691, "token": "\u0120extremity", "merges": "\u0120extrem ity", "count": 1208, "type": "remove by frequency"} -{"id": 44891, "token": "\u0120Whats", "merges": "\u0120What s", "count": 1208, "type": "remove by frequency"} -{"id": 28309, "token": "\u0120muy", "merges": "\u0120m uy", "count": 1208, "type": "remove by frequency"} -{"id": 6899, "token": "\u0120Theorem", "merges": "\u0120The orem", "count": 1209, "type": "remove by frequency"} -{"id": 42233, "token": "\u0120malice", "merges": "\u0120mal ice", "count": 1209, "type": "remove by frequency"} -{"id": 40803, "token": "\u0120wurde", "merges": "\u0120wur de", "count": 1209, "type": "remove by frequency"} -{"id": 33007, "token": "\u0120Bes", "merges": "\u0120B es", "count": 1209, "type": "remove by frequency"} -{"id": 48272, "token": "\u0120decidedly", "merges": "\u0120decided ly", "count": 1210, "type": "remove by frequency"} -{"id": 8604, "token": "documentclass", "merges": "document class", "count": 1211, "type": "remove by frequency"} -{"id": 49190, "token": "\u0120testifying", "merges": "\u0120test ifying", "count": 1211, "type": "remove by frequency"} -{"id": 46947, "token": "\u0120fain", "merges": "\u0120f ain", "count": 1211, "type": "remove by frequency"} -{"id": 25393, "token": "\u0120\u00d9\u0123", "merges": "\u0120 \u00d9\u0123", "count": 1211, "type": "remove by frequency"} -{"id": 28382, "token": "\u00c3\u00a1t", "merges": "\u00c3\u00a1 t", "count": 1211, "type": "remove by frequency"} -{"id": 28808, "token": "\u0120resonant", "merges": "\u0120reson ant", "count": 1212, "type": "remove by frequency"} -{"id": 23316, "token": "\u0120Carlos", "merges": "\u0120Carl os", "count": 1212, "type": "remove by frequency"} -{"id": 23844, "token": "\u0120larvae", "merges": "\u0120lar vae", "count": 1212, "type": "remove by frequency"} -{"id": 31726, "token": "\u0120Cord", "merges": "\u0120C ord", "count": 1212, "type": "remove by frequency"} -{"id": 50100, "token": "\u0120regenerative", "merges": "\u0120regener ative", "count": 1213, "type": "remove by frequency"} -{"id": 10298, "token": "\u00ce\u00b1\u00ce\u00b9", "merges": "\u00ce\u00b1 \u00ce\u00b9", "count": 1213, "type": "remove by frequency"} -{"id": 31252, "token": "ylic", "merges": "y lic", "count": 1213, "type": "remove by frequency"} -{"id": 9952, "token": "\\^", "merges": "\\ ^", "count": 1213, "type": "remove by frequency"} -{"id": 42958, "token": "\u0120Handbook", "merges": "\u0120Hand book", "count": 1214, "type": "remove by frequency"} -{"id": 23299, "token": "\u0120leukemia", "merges": "\u0120leuk emia", "count": 1214, "type": "remove by frequency"} -{"id": 31926, "token": "\u0120footballer", "merges": "\u0120football er", "count": 1215, "type": "remove by frequency"} -{"id": 27613, "token": "\u0120playoffs", "merges": "\u0120play offs", "count": 1215, "type": "remove by frequency"} -{"id": 42094, "token": "\u0120cyclists", "merges": "\u0120cycl ists", "count": 1216, "type": "remove by frequency"} -{"id": 25689, "token": "\u0120Arnold", "merges": "\u0120Arn old", "count": 1216, "type": "remove by frequency"} -{"id": 31442, "token": "\u0120anemia", "merges": "\u0120an emia", "count": 1216, "type": "remove by frequency"} -{"id": 37683, "token": "idazole", "merges": "id azole", "count": 1216, "type": "remove by frequency"} -{"id": 45106, "token": "\u0120Vinc", "merges": "\u0120V inc", "count": 1216, "type": "remove by frequency"} -{"id": 28324, "token": "\u0120conformation", "merges": "\u0120con formation", "count": 1217, "type": "remove by frequency"} -{"id": 49374, "token": "\u0120unexplained", "merges": "\u0120unexpl ained", "count": 1217, "type": "remove by frequency"} -{"id": 45495, "token": "\u0120_________________________________", "merges": "\u0120_ ________________________________", "count": 1218, "type": "remove by frequency"} -{"id": 33626, "token": "WriteLine", "merges": "Write Line", "count": 1218, "type": "remove by frequency"} -{"id": 35516, "token": "\u0120encephal", "merges": "\u0120en cephal", "count": 1218, "type": "remove by frequency"} -{"id": 27353, "token": "\u0120Hopkins", "merges": "\u0120Hop kins", "count": 1218, "type": "remove by frequency"} -{"id": 44603, "token": "\u0120flakes", "merges": "\u0120fl akes", "count": 1218, "type": "remove by frequency"} -{"id": 37879, "token": "\u0120ITS", "merges": "\u0120I TS", "count": 1218, "type": "remove by frequency"} -{"id": 34233, "token": "\u0120doping", "merges": "\u0120dop ing", "count": 1219, "type": "remove by frequency"} -{"id": 45816, "token": "\u0120Amin", "merges": "\u0120A min", "count": 1219, "type": "remove by frequency"} -{"id": 31353, "token": "ECs", "merges": "EC s", "count": 1219, "type": "remove by frequency"} -{"id": 45190, "token": "\u0120empirically", "merges": "\u0120empir ically", "count": 1220, "type": "remove by frequency"} -{"id": 50159, "token": "\u0120roadway", "merges": "\u0120road way", "count": 1220, "type": "remove by frequency"} -{"id": 47392, "token": "\u0120Luna", "merges": "\u0120L una", "count": 1220, "type": "remove by frequency"} -{"id": 11356, "token": "\u0120NJ", "merges": "\u0120N J", "count": 1220, "type": "remove by frequency"} -{"id": 44564, "token": "})-", "merges": "}) -", "count": 1220, "type": "remove by frequency"} -{"id": 43188, "token": "\u0120Estado", "merges": "\u0120Est ado", "count": 1221, "type": "remove by frequency"} -{"id": 40389, "token": "\u0120econ", "merges": "\u0120e con", "count": 1221, "type": "remove by frequency"} -{"id": 23773, "token": "\u0120Rice", "merges": "\u0120R ice", "count": 1221, "type": "remove by frequency"} -{"id": 47860, "token": "\u0120insightful", "merges": "\u0120insight ful", "count": 1222, "type": "remove by frequency"} -{"id": 24609, "token": "\u0120Commander", "merges": "\u0120Comm ander", "count": 1222, "type": "remove by frequency"} -{"id": 36190, "token": "\u0120Cardinal", "merges": "\u0120Card inal", "count": 1222, "type": "remove by frequency"} -{"id": 16358, "token": "\u0120Anthony", "merges": "\u0120Anth ony", "count": 1222, "type": "remove by frequency"} -{"id": 50216, "token": "\u0120glimps", "merges": "\u0120gl imps", "count": 1222, "type": "remove by frequency"} -{"id": 22471, "token": "\u0120glycol", "merges": "\u0120gly col", "count": 1222, "type": "remove by frequency"} -{"id": 36029, "token": "\u0120Hein", "merges": "\u0120He in", "count": 1222, "type": "remove by frequency"} -{"id": 47751, "token": "\u0120interstellar", "merges": "\u0120inter stellar", "count": 1223, "type": "remove by frequency"} -{"id": 36145, "token": "\u0120Previously", "merges": "\u0120Pre viously", "count": 1223, "type": "remove by frequency"} -{"id": 46812, "token": "ItemGroup", "merges": "Item Group", "count": 1223, "type": "remove by frequency"} -{"id": 49186, "token": "oriously", "merges": "or iously", "count": 1223, "type": "remove by frequency"} -{"id": 45734, "token": "\u0120Meaning", "merges": "\u0120Mean ing", "count": 1223, "type": "remove by frequency"} -{"id": 41482, "token": "\u0120shakes", "merges": "\u0120sh akes", "count": 1223, "type": "remove by frequency"} -{"id": 39959, "token": "\u0120Buzz", "merges": "\u0120Bu zz", "count": 1223, "type": "remove by frequency"} -{"id": 50368, "token": "\u00e2\u0138\u00ba", "merges": "\u00e2\u0138 \u00ba", "count": 1223, "type": "remove by frequency"} -{"id": 21826, "token": "\u0120LICENSE", "merges": "\u0120L ICENSE", "count": 1224, "type": "remove by frequency"} -{"id": 28029, "token": "\u0120Ellis", "merges": "\u0120Ell is", "count": 1224, "type": "remove by frequency"} -{"id": 18827, "token": "\u0120Jess", "merges": "\u0120J ess", "count": 1224, "type": "remove by frequency"} -{"id": 19403, "token": "\u0120retrospective", "merges": "\u0120retros pective", "count": 1225, "type": "remove by frequency"} -{"id": 31235, "token": "\u0120Pit", "merges": "\u0120P it", "count": 1225, "type": "remove by frequency"} -{"id": 46172, "token": "\u0120surreal", "merges": "\u0120sur real", "count": 1226, "type": "remove by frequency"} -{"id": 41806, "token": "\u0120autre", "merges": "\u0120aut re", "count": 1226, "type": "remove by frequency"} -{"id": 46663, "token": "\u00e1\u00bb\u0123", "merges": "\u00e1\u00bb \u0123", "count": 1226, "type": "remove by frequency"} -{"id": 28060, "token": "\u0120Emergency", "merges": "\u0120Emer gency", "count": 1227, "type": "remove by frequency"} -{"id": 20718, "token": "\u0120Memorial", "merges": "\u0120Mem orial", "count": 1227, "type": "remove by frequency"} -{"id": 45399, "token": "\u0120firefighters", "merges": "\u0120firef ighters", "count": 1228, "type": "remove by frequency"} -{"id": 36802, "token": "InstanceState", "merges": "Instance State", "count": 1228, "type": "remove by frequency"} -{"id": 40674, "token": "\u0120syringe", "merges": "\u0120sy ringe", "count": 1228, "type": "remove by frequency"} -{"id": 40493, "token": "\u0120helical", "merges": "\u0120hel ical", "count": 1228, "type": "remove by frequency"} -{"id": 39309, "token": "\u0120pissed", "merges": "\u0120piss ed", "count": 1228, "type": "remove by frequency"} -{"id": 38402, "token": "carbox", "merges": "car box", "count": 1228, "type": "remove by frequency"} -{"id": 43729, "token": "\u0120Lands", "merges": "\u0120L ands", "count": 1228, "type": "remove by frequency"} -{"id": 31721, "token": "\u0120Shir", "merges": "\u0120Sh ir", "count": 1228, "type": "remove by frequency"} -{"id": 22948, "token": "\u0120Ern", "merges": "\u0120Er n", "count": 1228, "type": "remove by frequency"} -{"id": 27527, "token": "CUSS", "merges": "C USS", "count": 1228, "type": "remove by frequency"} -{"id": 38619, "token": "\u00e0\u00a5\u0123", "merges": "\u00e0\u00a5 \u0123", "count": 1228, "type": "remove by frequency"} -{"id": 46290, "token": "\u0120\u00c2\u00a1", "merges": "\u0120\u00c2 \u00a1", "count": 1228, "type": "remove by frequency"} -{"id": 44324, "token": "\u0120quenching", "merges": "\u0120qu enching", "count": 1229, "type": "remove by frequency"} -{"id": 42024, "token": "\u0120litig", "merges": "\u0120lit ig", "count": 1229, "type": "remove by frequency"} -{"id": 48098, "token": "\u0120\u00d0\u00bb\u00d1\u0130", "merges": "\u0120\u00d0\u00bb \u00d1\u0130", "count": 1229, "type": "remove by frequency"} -{"id": 7524, "token": "\u0120{{\\", "merges": "\u0120{ {\\", "count": 1229, "type": "remove by frequency"} -{"id": 13803, "token": "\u0120\u00ce\u0136", "merges": "\u0120\u00ce \u0136", "count": 1229, "type": "remove by frequency"} -{"id": 23969, "token": "\u0120\u00e0\u00a6", "merges": "\u0120 \u00e0\u00a6", "count": 1229, "type": "remove by frequency"} -{"id": 11643, "token": "\u0120Harry", "merges": "\u0120Har ry", "count": 1230, "type": "remove by frequency"} -{"id": 44595, "token": "TIOC", "merges": "TI OC", "count": 1230, "type": "remove by frequency"} -{"id": 18823, "token": "\u0120Jenn", "merges": "\u0120J enn", "count": 1231, "type": "remove by frequency"} -{"id": 19329, "token": "\u0120KIND", "merges": "\u0120K IND", "count": 1231, "type": "remove by frequency"} -{"id": 41877, "token": "\u0120faut", "merges": "\u0120f aut", "count": 1231, "type": "remove by frequency"} -{"id": 35160, "token": "\u0120Gor", "merges": "\u0120G or", "count": 1231, "type": "remove by frequency"} -{"id": 9028, "token": "\u0120\u00ce\u00ba", "merges": "\u0120\u00ce \u00ba", "count": 1231, "type": "remove by frequency"} -{"id": 24468, "token": "\u0120inhibiting", "merges": "\u0120inhib iting", "count": 1232, "type": "remove by frequency"} -{"id": 35491, "token": "\u0120feminine", "merges": "\u0120fem inine", "count": 1232, "type": "remove by frequency"} -{"id": 43479, "token": "\u0120Pepper", "merges": "\u0120Pe pper", "count": 1232, "type": "remove by frequency"} -{"id": 39727, "token": "\u00d9\u012a\u00d8\u00b1", "merges": "\u00d9\u012a \u00d8\u00b1", "count": 1232, "type": "remove by frequency"} -{"id": 33472, "token": "\u00c3\u00bcl", "merges": "\u00c3\u00bc l", "count": 1232, "type": "remove by frequency"} -{"id": 40366, "token": "\u00e0\u00b9\u0126", "merges": "\u00e0\u00b9 \u0126", "count": 1232, "type": "remove by frequency"} -{"id": 26773, "token": "\u0120radiotherapy", "merges": "\u0120radi otherapy", "count": 1233, "type": "remove by frequency"} -{"id": 44572, "token": "asac", "merges": "as ac", "count": 1233, "type": "remove by frequency"} -{"id": 25120, "token": "\u0120NSA", "merges": "\u0120N SA", "count": 1233, "type": "remove by frequency"} -{"id": 31467, "token": "Bigg", "merges": "Big g", "count": 1233, "type": "remove by frequency"} -{"id": 7974, "token": "\u00c3\u0124", "merges": "\u00c3 \u0124", "count": 1233, "type": "remove by frequency"} -{"id": 45378, "token": "\u0120medalists", "merges": "\u0120medal ists", "count": 1234, "type": "remove by frequency"} -{"id": 28193, "token": "\u0120starring", "merges": "\u0120star ring", "count": 1234, "type": "remove by frequency"} -{"id": 46693, "token": "\u0120Dollar", "merges": "\u0120D ollar", "count": 1234, "type": "remove by frequency"} -{"id": 11308, "token": "\u0120Greek", "merges": "\u0120G reek", "count": 1234, "type": "remove by frequency"} -{"id": 49079, "token": "\u0120BACKGROUND", "merges": "\u0120BACK GROUND", "count": 1235, "type": "remove by frequency"} -{"id": 22758, "token": "\u0120Heaven", "merges": "\u0120He aven", "count": 1235, "type": "remove by frequency"} -{"id": 31529, "token": "\u0120Loren", "merges": "\u0120L oren", "count": 1235, "type": "remove by frequency"} -{"id": 43050, "token": "\u0120Dj", "merges": "\u0120D j", "count": 1235, "type": "remove by frequency"} -{"id": 40966, "token": "\u0120Magnetic", "merges": "\u0120M agnetic", "count": 1236, "type": "remove by frequency"} -{"id": 35955, "token": "\u0120../../", "merges": "\u0120 ../../", "count": 1236, "type": "remove by frequency"} -{"id": 49446, "token": "\u0120confl", "merges": "\u0120con fl", "count": 1236, "type": "remove by frequency"} -{"id": 45589, "token": "\u0120characterizes", "merges": "\u0120character izes", "count": 1237, "type": "remove by frequency"} -{"id": 37817, "token": "\u0120Leadership", "merges": "\u0120Lead ership", "count": 1237, "type": "remove by frequency"} -{"id": 41403, "token": "\u0120insanity", "merges": "\u0120ins anity", "count": 1237, "type": "remove by frequency"} -{"id": 36690, "token": "\u0120branched", "merges": "\u0120branc hed", "count": 1237, "type": "remove by frequency"} -{"id": 49897, "token": "ostatin", "merges": "ost atin", "count": 1237, "type": "remove by frequency"} -{"id": 35140, "token": "\u0120apo", "merges": "\u0120ap o", "count": 1237, "type": "remove by frequency"} -{"id": 33225, "token": "\u00e0\u00a6\u00be", "merges": "\u00e0\u00a6 \u00be", "count": 1237, "type": "remove by frequency"} -{"id": 48591, "token": "Asked", "merges": "As ked", "count": 1238, "type": "remove by frequency"} -{"id": 13771, "token": "\u0120Rather", "merges": "\u0120R ather", "count": 1239, "type": "remove by frequency"} -{"id": 35500, "token": "\u0120pacing", "merges": "\u0120p acing", "count": 1239, "type": "remove by frequency"} -{"id": 34392, "token": "\u0120RSV", "merges": "\u0120R SV", "count": 1239, "type": "remove by frequency"} -{"id": 43389, "token": "\u0120\u00c2\u00a5", "merges": "\u0120\u00c2 \u00a5", "count": 1239, "type": "remove by frequency"} -{"id": 48491, "token": "\u0120pessoas", "merges": "\u0120pesso as", "count": 1240, "type": "remove by frequency"} -{"id": 48822, "token": "\u0120playful", "merges": "\u0120play ful", "count": 1240, "type": "remove by frequency"} -{"id": 44974, "token": "\u0120shouts", "merges": "\u0120sh outs", "count": 1240, "type": "remove by frequency"} -{"id": 49451, "token": "\u0120Submitted", "merges": "\u0120Sub mitted", "count": 1241, "type": "remove by frequency"} -{"id": 47223, "token": "\u0120unpaired", "merges": "\u0120un paired", "count": 1241, "type": "remove by frequency"} -{"id": 35158, "token": "apopt", "merges": "ap opt", "count": 1241, "type": "remove by frequency"} -{"id": 19619, "token": "\u0120Okay", "merges": "\u0120O kay", "count": 1241, "type": "remove by frequency"} -{"id": 49323, "token": "\u0120\u00d0\u00b2\u00d0\u00be", "merges": "\u0120\u00d0\u00b2 \u00d0\u00be", "count": 1241, "type": "remove by frequency"} -{"id": 36702, "token": "\u0120Kot", "merges": "\u0120K ot", "count": 1241, "type": "remove by frequency"} -{"id": 49569, "token": "\u0120flares", "merges": "\u0120fl ares", "count": 1242, "type": "remove by frequency"} -{"id": 11301, "token": "\u0120Bush", "merges": "\u0120B ush", "count": 1242, "type": "remove by frequency"} -{"id": 30912, "token": "\u0120Stre", "merges": "\u0120St re", "count": 1242, "type": "remove by frequency"} -{"id": 46823, "token": "\u00e2\u0122\u0136'", "merges": "\u00e2\u0122\u0136 '", "count": 1242, "type": "remove by frequency"} -{"id": 32133, "token": "\u0120Eisen", "merges": "\u0120E isen", "count": 1243, "type": "remove by frequency"} -{"id": 26417, "token": "\u0120Nar", "merges": "\u0120N ar", "count": 1243, "type": "remove by frequency"} -{"id": 43074, "token": "\u0120mattered", "merges": "\u0120mat tered", "count": 1244, "type": "remove by frequency"} -{"id": 42694, "token": "atase", "merges": "at ase", "count": 1244, "type": "remove by frequency"} -{"id": 34381, "token": "\u0120gew", "merges": "\u0120g ew", "count": 1244, "type": "remove by frequency"} -{"id": 40491, "token": "\u0120halogen", "merges": "\u0120hal ogen", "count": 1245, "type": "remove by frequency"} -{"id": 35677, "token": "\u0120Stern", "merges": "\u0120S tern", "count": 1245, "type": "remove by frequency"} -{"id": 48397, "token": "\u0120contrasted", "merges": "\u0120contrast ed", "count": 1246, "type": "remove by frequency"} -{"id": 39377, "token": "\u0120speculated", "merges": "\u0120spec ulated", "count": 1246, "type": "remove by frequency"} -{"id": 49358, "token": "\u0120monstrous", "merges": "\u0120mon strous", "count": 1246, "type": "remove by frequency"} -{"id": 28200, "token": "\u0120Maxwell", "merges": "\u0120Max well", "count": 1246, "type": "remove by frequency"} -{"id": 44074, "token": "\u0120Lights", "merges": "\u0120L ights", "count": 1246, "type": "remove by frequency"} -{"id": 41196, "token": "\u0120Falcon", "merges": "\u0120Fal con", "count": 1246, "type": "remove by frequency"} -{"id": 48195, "token": "\u0120SMC", "merges": "\u0120S MC", "count": 1246, "type": "remove by frequency"} -{"id": 34342, "token": "dimethyl", "merges": "dim ethyl", "count": 1247, "type": "remove by frequency"} -{"id": 32904, "token": "\u0120Plasma", "merges": "\u0120Pl asma", "count": 1248, "type": "remove by frequency"} -{"id": 44676, "token": "\u0120twists", "merges": "\u0120tw ists", "count": 1248, "type": "remove by frequency"} -{"id": 45385, "token": "\u0120Skype", "merges": "\u0120Sk ype", "count": 1248, "type": "remove by frequency"} -{"id": 35439, "token": "Lemma", "merges": "Lem ma", "count": 1248, "type": "remove by frequency"} -{"id": 16329, "token": "\u0120susceptibility", "merges": "\u0120suscept ibility", "count": 1249, "type": "remove by frequency"} -{"id": 48504, "token": "\u0120excused", "merges": "\u0120exc used", "count": 1249, "type": "remove by frequency"} -{"id": 43835, "token": "Whenever", "merges": "Whe never", "count": 1249, "type": "remove by frequency"} -{"id": 17650, "token": "\u0120Walter", "merges": "\u0120Wal ter", "count": 1249, "type": "remove by frequency"} -{"id": 34827, "token": "\u0120Beg", "merges": "\u0120B eg", "count": 1249, "type": "remove by frequency"} -{"id": 23124, "token": "\u0120Jur", "merges": "\u0120J ur", "count": 1249, "type": "remove by frequency"} -{"id": 32044, "token": "<_", "merges": "< _", "count": 1249, "type": "remove by frequency"} -{"id": 46672, "token": "\u0120caliber", "merges": "\u0120cal iber", "count": 1250, "type": "remove by frequency"} -{"id": 9643, "token": "\u0120Britain", "merges": "\u0120Brit ain", "count": 1250, "type": "remove by frequency"} -{"id": 31052, "token": "\u0120atyp", "merges": "\u0120at yp", "count": 1250, "type": "remove by frequency"} -{"id": 41098, "token": "\u0120Shan", "merges": "\u0120Sh an", "count": 1250, "type": "remove by frequency"} -{"id": 14566, "token": "\u0120Having", "merges": "\u0120H aving", "count": 1251, "type": "remove by frequency"} -{"id": 37477, "token": "\u0120Tong", "merges": "\u0120T ong", "count": 1251, "type": "remove by frequency"} -{"id": 31661, "token": "\u0120Cec", "merges": "\u0120C ec", "count": 1251, "type": "remove by frequency"} -{"id": 20847, "token": "\u0120skeletal", "merges": "\u0120ske letal", "count": 1252, "type": "remove by frequency"} -{"id": 21385, "token": "\u0120Rab", "merges": "\u0120R ab", "count": 1252, "type": "remove by frequency"} -{"id": 28377, "token": "\u0120Ras", "merges": "\u0120R as", "count": 1252, "type": "remove by frequency"} -{"id": 29267, "token": "\u0120dialysis", "merges": "\u0120dial ysis", "count": 1253, "type": "remove by frequency"} -{"id": 26940, "token": "\u0120Authors", "merges": "\u0120Auth ors", "count": 1253, "type": "remove by frequency"} -{"id": 28992, "token": "ophilic", "merges": "ophil ic", "count": 1253, "type": "remove by frequency"} -{"id": 41419, "token": "NotNull", "merges": "Not Null", "count": 1253, "type": "remove by frequency"} -{"id": 15685, "token": "\u0120incubation", "merges": "\u0120incub ation", "count": 1254, "type": "remove by frequency"} -{"id": 37665, "token": "\u0120Cities", "merges": "\u0120C ities", "count": 1254, "type": "remove by frequency"} -{"id": 37745, "token": "\u0120Twin", "merges": "\u0120Tw in", "count": 1254, "type": "remove by frequency"} -{"id": 40916, "token": "innen", "merges": "inn en", "count": 1254, "type": "remove by frequency"} -{"id": 38258, "token": "onyl", "merges": "on yl", "count": 1254, "type": "remove by frequency"} -{"id": 50206, "token": "\u0120groaned", "merges": "\u0120gro aned", "count": 1255, "type": "remove by frequency"} -{"id": 21789, "token": "\u0120Bah", "merges": "\u0120B ah", "count": 1256, "type": "remove by frequency"} -{"id": 17669, "token": "\u0120lui", "merges": "\u0120l ui", "count": 1256, "type": "remove by frequency"} -{"id": 26601, "token": "\u00e0\u00ac", "merges": "\u00e0 \u00ac", "count": 1256, "type": "remove by frequency"} -{"id": 31400, "token": "\u0120Schmidt", "merges": "\u0120Sch midt", "count": 1257, "type": "remove by frequency"} -{"id": 39123, "token": "\u0120Canon", "merges": "\u0120Can on", "count": 1257, "type": "remove by frequency"} -{"id": 23801, "token": "\u0120Honor", "merges": "\u0120Hon or", "count": 1257, "type": "remove by frequency"} -{"id": 49573, "token": "\u0120Chow", "merges": "\u0120Ch ow", "count": 1257, "type": "remove by frequency"} -{"id": 24965, "token": "\u0120Collabor", "merges": "\u0120Coll abor", "count": 1258, "type": "remove by frequency"} -{"id": 24538, "token": "\u0120Berkeley", "merges": "\u0120Ber keley", "count": 1258, "type": "remove by frequency"} -{"id": 19995, "token": "\u0120Murray", "merges": "\u0120Mur ray", "count": 1258, "type": "remove by frequency"} -{"id": 47270, "token": "\u0120\u00d0\u00be\u00d0\u00bd", "merges": "\u0120\u00d0\u00be \u00d0\u00bd", "count": 1258, "type": "remove by frequency"} -{"id": 23549, "token": "\u0120Dow", "merges": "\u0120D ow", "count": 1258, "type": "remove by frequency"} -{"id": 32212, "token": "\u0120Nutrition", "merges": "\u0120Nut rition", "count": 1259, "type": "remove by frequency"} -{"id": 44597, "token": "\u0120medically", "merges": "\u0120med ically", "count": 1259, "type": "remove by frequency"} -{"id": 46449, "token": "\u0120causality", "merges": "\u0120caus ality", "count": 1259, "type": "remove by frequency"} -{"id": 37645, "token": "\u0120Rapid", "merges": "\u0120Rap id", "count": 1259, "type": "remove by frequency"} -{"id": 41075, "token": "\u0120exploratory", "merges": "\u0120explor atory", "count": 1260, "type": "remove by frequency"} -{"id": 43707, "token": "\u0120joking", "merges": "\u0120j oking", "count": 1260, "type": "remove by frequency"} -{"id": 37351, "token": "\u0120Facts", "merges": "\u0120F acts", "count": 1260, "type": "remove by frequency"} -{"id": 49485, "token": "geons", "merges": "ge ons", "count": 1260, "type": "remove by frequency"} -{"id": 48060, "token": "\u0120enhancer", "merges": "\u0120enh ancer", "count": 1261, "type": "remove by frequency"} -{"id": 46375, "token": "\u0120Rainbow", "merges": "\u0120Rain bow", "count": 1261, "type": "remove by frequency"} -{"id": 49623, "token": "Soviet", "merges": "Sov iet", "count": 1261, "type": "remove by frequency"} -{"id": 18559, "token": "\u0120Jos", "merges": "\u0120J os", "count": 1261, "type": "remove by frequency"} -{"id": 50348, "token": "\u00e2\u0137\u0142", "merges": "\u00e2\u0137 \u0142", "count": 1261, "type": "remove by frequency"} -{"id": 35600, "token": "\u0120trapping", "merges": "\u0120tra pping", "count": 1262, "type": "remove by frequency"} -{"id": 30309, "token": "\u0120ECM", "merges": "\u0120E CM", "count": 1262, "type": "remove by frequency"} -{"id": 12227, "token": "=\"../../", "merges": "=\" ../../", "count": 1263, "type": "remove by frequency"} -{"id": 48978, "token": "oplasma", "merges": "opl asma", "count": 1263, "type": "remove by frequency"} -{"id": 22921, "token": "\u0120Eld", "merges": "\u0120E ld", "count": 1263, "type": "remove by frequency"} -{"id": 19804, "token": "\u0120Regional", "merges": "\u0120Reg ional", "count": 1264, "type": "remove by frequency"} -{"id": 42133, "token": "\u0120amenable", "merges": "\u0120am enable", "count": 1264, "type": "remove by frequency"} -{"id": 19810, "token": "\u0120Others", "merges": "\u0120Other s", "count": 1264, "type": "remove by frequency"} -{"id": 26995, "token": "INION", "merges": "IN ION", "count": 1264, "type": "remove by frequency"} -{"id": 18221, "token": "\u00c3\u0127", "merges": "\u00c3 \u0127", "count": 1264, "type": "remove by frequency"} -{"id": 17883, "token": "\u0120myocardial", "merges": "\u0120myocard ial", "count": 1265, "type": "remove by frequency"} -{"id": 33110, "token": "\u0120canine", "merges": "\u0120can ine", "count": 1265, "type": "remove by frequency"} -{"id": 41410, "token": "PropertyGroup", "merges": "Property Group", "count": 1266, "type": "remove by frequency"} -{"id": 35781, "token": "rivolous", "merges": "rivol ous", "count": 1266, "type": "remove by frequency"} -{"id": 30668, "token": "\u0120mascul", "merges": "\u0120mas cul", "count": 1266, "type": "remove by frequency"} -{"id": 42475, "token": "\u0120Chance", "merges": "\u0120Ch ance", "count": 1266, "type": "remove by frequency"} -{"id": 38732, "token": "\u0120hyste", "merges": "\u0120hy ste", "count": 1266, "type": "remove by frequency"} -{"id": 46119, "token": "ESULT", "merges": "ES ULT", "count": 1266, "type": "remove by frequency"} -{"id": 30672, "token": "\u0120Nas", "merges": "\u0120N as", "count": 1266, "type": "remove by frequency"} -{"id": 46674, "token": "\u00e3\u0123\u00b3", "merges": "\u00e3\u0123 \u00b3", "count": 1266, "type": "remove by frequency"} -{"id": 41776, "token": "\u0120prescribing", "merges": "\u0120pres cribing", "count": 1267, "type": "remove by frequency"} -{"id": 27867, "token": "\u0120sequenced", "merges": "\u0120sequ enced", "count": 1267, "type": "remove by frequency"} -{"id": 40737, "token": "\u0120Campus", "merges": "\u0120Camp us", "count": 1267, "type": "remove by frequency"} -{"id": 36797, "token": "\u0120Mutual", "merges": "\u0120Mut ual", "count": 1267, "type": "remove by frequency"} -{"id": 44726, "token": "\u0120Rates", "merges": "\u0120R ates", "count": 1267, "type": "remove by frequency"} -{"id": 39636, "token": "\u00e3\u0124\u0124\u00e3\u0123\u00ae", "merges": "\u00e3\u0124\u0124 \u00e3\u0123\u00ae", "count": 1267, "type": "remove by frequency"} -{"id": 50372, "token": "\u00e2\u0139\u0129", "merges": "\u00e2\u0139 \u0129", "count": 1267, "type": "remove by frequency"} -{"id": 39216, "token": "\u0120granules", "merges": "\u0120gran ules", "count": 1268, "type": "remove by frequency"} -{"id": 52248, "token": "\u00e6\u0140\u00b1", "merges": "\u00e6\u0140 \u00b1", "count": 1268, "type": "remove by frequency"} -{"id": 46918, "token": "Qaeda", "merges": "Q aeda", "count": 1269, "type": "remove by frequency"} -{"id": 15088, "token": "\u0120secretion", "merges": "\u0120secret ion", "count": 1270, "type": "remove by frequency"} -{"id": 50352, "token": "\u00e2\u0137\u00b0", "merges": "\u00e2\u0137 \u00b0", "count": 1270, "type": "remove by frequency"} -{"id": 21706, "token": "\u0120Hills", "merges": "\u0120H ills", "count": 1271, "type": "remove by frequency"} -{"id": 29741, "token": "\u00e3\u0123\u00a7\u00e3\u0123\u00af", "merges": "\u00e3\u0123\u00a7 \u00e3\u0123\u00af", "count": 1271, "type": "remove by frequency"} -{"id": 22406, "token": "ocarcin", "merges": "oc arcin", "count": 1272, "type": "remove by frequency"} -{"id": 45700, "token": "\u0120vibe", "merges": "\u0120v ibe", "count": 1272, "type": "remove by frequency"} -{"id": 45719, "token": ",'\"", "merges": ",' \"", "count": 1272, "type": "remove by frequency"} -{"id": 44880, "token": "\u0120therapists", "merges": "\u0120therap ists", "count": 1273, "type": "remove by frequency"} -{"id": 44696, "token": "\u0120hydroxide", "merges": "\u0120hydrox ide", "count": 1273, "type": "remove by frequency"} -{"id": 21872, "token": "\u0120Perry", "merges": "\u0120Per ry", "count": 1273, "type": "remove by frequency"} -{"id": 44892, "token": "\u0120Blind", "merges": "\u0120Bl ind", "count": 1273, "type": "remove by frequency"} -{"id": 29880, "token": "\u0120Pont", "merges": "\u0120P ont", "count": 1273, "type": "remove by frequency"} -{"id": 48167, "token": "][^", "merges": "] [^", "count": 1273, "type": "remove by frequency"} -{"id": 48306, "token": "\u0120formaldehyde", "merges": "\u0120formal dehyde", "count": 1274, "type": "remove by frequency"} -{"id": 50104, "token": "\u0120dramat", "merges": "\u0120dram at", "count": 1274, "type": "remove by frequency"} -{"id": 48230, "token": "\u0120spong", "merges": "\u0120sp ong", "count": 1274, "type": "remove by frequency"} -{"id": 21954, "token": "\u0120Abd", "merges": "\u0120Ab d", "count": 1274, "type": "remove by frequency"} -{"id": 43135, "token": "\u0120SES", "merges": "\u0120S ES", "count": 1274, "type": "remove by frequency"} -{"id": 44220, "token": "\u0120Treasure", "merges": "\u0120Tre asure", "count": 1275, "type": "remove by frequency"} -{"id": 7814, "token": "\u0120Saturday", "merges": "\u0120S aturday", "count": 1275, "type": "remove by frequency"} -{"id": 23665, "token": "\u00c5\u00af", "merges": "\u00c5 \u00af", "count": 1276, "type": "remove by frequency"} -{"id": 33344, "token": "\u0120tunneling", "merges": "\u0120tunn eling", "count": 1277, "type": "remove by frequency"} -{"id": 28013, "token": "astolic", "merges": "ast olic", "count": 1277, "type": "remove by frequency"} -{"id": 27907, "token": "\u0120Buch", "merges": "\u0120B uch", "count": 1277, "type": "remove by frequency"} -{"id": 49023, "token": "\u0120mysq", "merges": "\u0120my sq", "count": 1277, "type": "remove by frequency"} -{"id": 23583, "token": "\u0120Liberty", "merges": "\u0120Liber ty", "count": 1278, "type": "remove by frequency"} -{"id": 38570, "token": "\u0120shoved", "merges": "\u0120sh oved", "count": 1278, "type": "remove by frequency"} -{"id": 22759, "token": "\u0120dairy", "merges": "\u0120d airy", "count": 1278, "type": "remove by frequency"} -{"id": 30739, "token": "\u0120Hir", "merges": "\u0120H ir", "count": 1278, "type": "remove by frequency"} -{"id": 21246, "token": "\u0120Stadium", "merges": "\u0120St adium", "count": 1279, "type": "remove by frequency"} -{"id": 46132, "token": "\u0120diffus", "merges": "\u0120diff us", "count": 1279, "type": "remove by frequency"} -{"id": 39942, "token": "\u0120Tub", "merges": "\u0120T ub", "count": 1279, "type": "remove by frequency"} -{"id": 28318, "token": "\u0120Fol", "merges": "\u0120F ol", "count": 1279, "type": "remove by frequency"} -{"id": 49646, "token": "\u0120swallowing", "merges": "\u0120swallow ing", "count": 1280, "type": "remove by frequency"} -{"id": 50193, "token": "\u0120ventured", "merges": "\u0120vent ured", "count": 1280, "type": "remove by frequency"} -{"id": 39365, "token": "\u0120Syndrome", "merges": "\u0120Synd rome", "count": 1280, "type": "remove by frequency"} -{"id": 30902, "token": "\u0120Stories", "merges": "\u0120St ories", "count": 1280, "type": "remove by frequency"} -{"id": 41434, "token": "cardia", "merges": "card ia", "count": 1280, "type": "remove by frequency"} -{"id": 26608, "token": "\u0120Camb", "merges": "\u0120C amb", "count": 1280, "type": "remove by frequency"} -{"id": 41452, "token": "\u0120Milk", "merges": "\u0120Mil k", "count": 1280, "type": "remove by frequency"} -{"id": 46792, "token": "\u0120Skills", "merges": "\u0120Sk ills", "count": 1281, "type": "remove by frequency"} -{"id": 17446, "token": "\u0120Ted", "merges": "\u0120T ed", "count": 1281, "type": "remove by frequency"} -{"id": 27081, "token": "}/\\", "merges": "} /\\", "count": 1281, "type": "remove by frequency"} -{"id": 38591, "token": "\u00e3\u0123\u0124\u00e3\u0124\u012c", "merges": "\u00e3\u0123\u0124 \u00e3\u0124\u012c", "count": 1282, "type": "remove by frequency"} -{"id": 37621, "token": "\u0120Cock", "merges": "\u0120C ock", "count": 1282, "type": "remove by frequency"} -{"id": 49546, "token": "\u0120Anc", "merges": "\u0120An c", "count": 1282, "type": "remove by frequency"} -{"id": 31788, "token": "\u0120nicotine", "merges": "\u0120nic otine", "count": 1283, "type": "remove by frequency"} -{"id": 36239, "token": "\u0120Lenn", "merges": "\u0120L enn", "count": 1283, "type": "remove by frequency"} -{"id": 27402, "token": "\u0120Gol", "merges": "\u0120G ol", "count": 1283, "type": "remove by frequency"} -{"id": 50124, "token": "\u0120insulting", "merges": "\u0120insult ing", "count": 1284, "type": "remove by frequency"} -{"id": 37914, "token": "\u0120whipped", "merges": "\u0120wh ipped", "count": 1284, "type": "remove by frequency"} -{"id": 43647, "token": "\u0120Broadcasting", "merges": "\u0120Broad casting", "count": 1285, "type": "remove by frequency"} -{"id": 47818, "token": "\u0120pelvis", "merges": "\u0120pel vis", "count": 1285, "type": "remove by frequency"} -{"id": 20291, "token": "\u0120Isa", "merges": "\u0120Is a", "count": 1285, "type": "remove by frequency"} -{"id": 52396, "token": "\u00e6\u0143\u0135", "merges": "\u00e6\u0143 \u0135", "count": 1285, "type": "remove by frequency"} -{"id": 17567, "token": "\\{\\", "merges": "\\ {\\", "count": 1285, "type": "remove by frequency"} -{"id": 48528, "token": "\u0120depiction", "merges": "\u0120dep iction", "count": 1287, "type": "remove by frequency"} -{"id": 33245, "token": "\u0120Cele", "merges": "\u0120C ele", "count": 1287, "type": "remove by frequency"} -{"id": 40147, "token": "\u0120spectrophot", "merges": "\u0120spect rophot", "count": 1288, "type": "remove by frequency"} -{"id": 44951, "token": "\u00d0\u00be\u00d0\u00b3\u00d0\u00b4\u00d0\u00b0", "merges": "\u00d0\u00be\u00d0\u00b3 \u00d0\u00b4\u00d0\u00b0", "count": 1288, "type": "remove by frequency"} -{"id": 37247, "token": "\u0120Weber", "merges": "\u0120We ber", "count": 1288, "type": "remove by frequency"} -{"id": 33655, "token": "\u0120Shin", "merges": "\u0120Sh in", "count": 1288, "type": "remove by frequency"} -{"id": 44589, "token": "\u0120Une", "merges": "\u0120U ne", "count": 1288, "type": "remove by frequency"} -{"id": 39609, "token": "\u0120instincts", "merges": "\u0120instinct s", "count": 1289, "type": "remove by frequency"} -{"id": 27282, "token": "\u0120Politics", "merges": "\u0120Polit ics", "count": 1289, "type": "remove by frequency"} -{"id": 35023, "token": "ar\u00c3\u00a9", "merges": "ar \u00c3\u00a9", "count": 1289, "type": "remove by frequency"} -{"id": 22816, "token": "\u0120irreducible", "merges": "\u0120ir reducible", "count": 1290, "type": "remove by frequency"} -{"id": 42402, "token": "\u0120midfielder", "merges": "\u0120mid fielder", "count": 1290, "type": "remove by frequency"} -{"id": 37990, "token": "ferroni", "merges": "ferr oni", "count": 1290, "type": "remove by frequency"} -{"id": 49151, "token": "\u00e0\u00b8\u012a", "merges": "\u00e0\u00b8 \u012a", "count": 1290, "type": "remove by frequency"} -{"id": 48913, "token": "\u0120biodegrad", "merges": "\u0120bi odegrad", "count": 1291, "type": "remove by frequency"} -{"id": 18673, "token": "\u0120Bruce", "merges": "\u0120Bru ce", "count": 1291, "type": "remove by frequency"} -{"id": 20567, "token": "\u0120Kings", "merges": "\u0120K ings", "count": 1291, "type": "remove by frequency"} -{"id": 21871, "token": "\u0120thym", "merges": "\u0120th ym", "count": 1291, "type": "remove by frequency"} -{"id": 19888, "token": "\u0120\u00c5\u00be", "merges": "\u0120\u00c5 \u00be", "count": 1291, "type": "remove by frequency"} -{"id": 41976, "token": "\u0120Journey", "merges": "\u0120Jour ney", "count": 1292, "type": "remove by frequency"} -{"id": 49316, "token": "ICAg", "merges": "IC Ag", "count": 1292, "type": "remove by frequency"} -{"id": 34190, "token": "\u0120transfusion", "merges": "\u0120trans fusion", "count": 1293, "type": "remove by frequency"} -{"id": 42207, "token": "\u0120bacterium", "merges": "\u0120bacter ium", "count": 1293, "type": "remove by frequency"} -{"id": 46698, "token": "\u0120prolific", "merges": "\u0120prol ific", "count": 1293, "type": "remove by frequency"} -{"id": 7671, "token": "\u0120Federal", "merges": "\u0120F ederal", "count": 1293, "type": "remove by frequency"} -{"id": 44926, "token": "\u0120SUMMARY", "merges": "\u0120SUM MARY", "count": 1293, "type": "remove by frequency"} -{"id": 46805, "token": "\u0120forts", "merges": "\u0120for ts", "count": 1293, "type": "remove by frequency"} -{"id": 31292, "token": "\u0120Kun", "merges": "\u0120K un", "count": 1293, "type": "remove by frequency"} -{"id": 32320, "token": "isi\u00c3\u00b3n", "merges": "is i\u00c3\u00b3n", "count": 1294, "type": "remove by frequency"} -{"id": 35175, "token": "\u0120Shannon", "merges": "\u0120Sh annon", "count": 1295, "type": "remove by frequency"} -{"id": 47395, "token": "\u0120slated", "merges": "\u0120sl ated", "count": 1295, "type": "remove by frequency"} -{"id": 11725, "token": "\u0120kinase", "merges": "\u0120kin ase", "count": 1295, "type": "remove by frequency"} -{"id": 31958, "token": "\u0120Biology", "merges": "\u0120Bi ology", "count": 1296, "type": "remove by frequency"} -{"id": 32677, "token": "\u0120rins", "merges": "\u0120r ins", "count": 1296, "type": "remove by frequency"} -{"id": 47114, "token": "\u0120IOP", "merges": "\u0120I OP", "count": 1296, "type": "remove by frequency"} -{"id": 43986, "token": "\u0120redshifts", "merges": "\u0120red shifts", "count": 1297, "type": "remove by frequency"} -{"id": 39469, "token": "\u0120microbes", "merges": "\u0120micro bes", "count": 1297, "type": "remove by frequency"} -{"id": 49961, "token": "\u0120conjunct", "merges": "\u0120conj unct", "count": 1298, "type": "remove by frequency"} -{"id": 17885, "token": "\u0120Douglas", "merges": "\u0120Doug las", "count": 1298, "type": "remove by frequency"} -{"id": 7650, "token": "\u0120Appe", "merges": "\u0120Ap pe", "count": 1298, "type": "remove by frequency"} -{"id": 34594, "token": "\u0120XV", "merges": "\u0120X V", "count": 1298, "type": "remove by frequency"} -{"id": 30259, "token": "\u0120exhibiting", "merges": "\u0120exhib iting", "count": 1299, "type": "remove by frequency"} -{"id": 49537, "token": "\u0120Qing", "merges": "\u0120Q ing", "count": 1299, "type": "remove by frequency"} -{"id": 35779, "token": "\u0120eds", "merges": "\u0120ed s", "count": 1299, "type": "remove by frequency"} -{"id": 47541, "token": "\u0120inclusions", "merges": "\u0120in clusions", "count": 1300, "type": "remove by frequency"} -{"id": 34463, "token": "\u0120HSP", "merges": "\u0120H SP", "count": 1300, "type": "remove by frequency"} -{"id": 43899, "token": "\u0120Ub", "merges": "\u0120U b", "count": 1300, "type": "remove by frequency"} -{"id": 19663, "token": "\u0120soluble", "merges": "\u0120sol uble", "count": 1301, "type": "remove by frequency"} -{"id": 46272, "token": "\u0120concede", "merges": "\u0120conced e", "count": 1301, "type": "remove by frequency"} -{"id": 35992, "token": "\u0120Fur", "merges": "\u0120F ur", "count": 1301, "type": "remove by frequency"} -{"id": 45927, "token": "\u0120cheering", "merges": "\u0120che ering", "count": 1302, "type": "remove by frequency"} -{"id": 40534, "token": "\u0120Yo", "merges": "\u0120Y o", "count": 1302, "type": "remove by frequency"} -{"id": 39947, "token": "\u0120vertebra", "merges": "\u0120verte bra", "count": 1303, "type": "remove by frequency"} -{"id": 34000, "token": "\u0120immature", "merges": "\u0120imm ature", "count": 1303, "type": "remove by frequency"} -{"id": 25298, "token": "\u0120Pierre", "merges": "\u0120Pier re", "count": 1303, "type": "remove by frequency"} -{"id": 41371, "token": "\u0120Parts", "merges": "\u0120Part s", "count": 1303, "type": "remove by frequency"} -{"id": 31812, "token": "\u0120isso", "merges": "\u0120is so", "count": 1303, "type": "remove by frequency"} -{"id": 44793, "token": "arynx", "merges": "aryn x", "count": 1303, "type": "remove by frequency"} -{"id": 20699, "token": "\u0120Hus", "merges": "\u0120H us", "count": 1303, "type": "remove by frequency"} -{"id": 43073, "token": "\u0120differentiating", "merges": "\u0120different iating", "count": 1304, "type": "remove by frequency"} -{"id": 28870, "token": "\u0120nuest", "merges": "\u0120nu est", "count": 1304, "type": "remove by frequency"} -{"id": 15273, "token": "\u0120Kevin", "merges": "\u0120K evin", "count": 1304, "type": "remove by frequency"} -{"id": 22125, "token": "i\u00c3\u00b3", "merges": "i \u00c3\u00b3", "count": 1304, "type": "remove by frequency"} -{"id": 46553, "token": "\u0120Structural", "merges": "\u0120Struct ural", "count": 1305, "type": "remove by frequency"} -{"id": 31881, "token": "\u0120Inspector", "merges": "\u0120Ins pector", "count": 1305, "type": "remove by frequency"} -{"id": 39884, "token": "\u0120degli", "merges": "\u0120deg li", "count": 1305, "type": "remove by frequency"} -{"id": 30576, "token": "\u0120Parl", "merges": "\u0120Par l", "count": 1305, "type": "remove by frequency"} -{"id": 43806, "token": "ycin", "merges": "y cin", "count": 1305, "type": "remove by frequency"} -{"id": 26425, "token": "\u0120broth", "merges": "\u0120bro th", "count": 1306, "type": "remove by frequency"} -{"id": 22757, "token": "\u0120Guy", "merges": "\u0120Gu y", "count": 1307, "type": "remove by frequency"} -{"id": 18626, "token": "\u00e2\u0122\u00b3", "merges": "\u00e2\u0122 \u00b3", "count": 1308, "type": "remove by frequency"} -{"id": 31103, "token": "earchers", "merges": "ear chers", "count": 1309, "type": "remove by frequency"} -{"id": 12123, "token": "\u0120Zealand", "merges": "\u0120Ze aland", "count": 1309, "type": "remove by frequency"} -{"id": 40414, "token": "\u0120tipped", "merges": "\u0120t ipped", "count": 1309, "type": "remove by frequency"} -{"id": 23555, "token": "\u0120Holmes", "merges": "\u0120Hol mes", "count": 1309, "type": "remove by frequency"} -{"id": 39054, "token": "\u0120Ez", "merges": "\u0120E z", "count": 1309, "type": "remove by frequency"} -{"id": 39316, "token": "\u0120famously", "merges": "\u0120fam ously", "count": 1310, "type": "remove by frequency"} -{"id": 35084, "token": "\u0120Silva", "merges": "\u0120Sil va", "count": 1310, "type": "remove by frequency"} -{"id": 28322, "token": "\u0120Ori", "merges": "\u0120O ri", "count": 1310, "type": "remove by frequency"} -{"id": 34411, "token": "\u0120Lov", "merges": "\u0120L ov", "count": 1310, "type": "remove by frequency"} -{"id": 24532, "token": "\u0120schizophrenia", "merges": "\u0120schizophren ia", "count": 1311, "type": "remove by frequency"} -{"id": 9883, "token": "\u0120Spanish", "merges": "\u0120Sp anish", "count": 1311, "type": "remove by frequency"} -{"id": 47371, "token": "\u0120obliter", "merges": "\u0120ob liter", "count": 1311, "type": "remove by frequency"} -{"id": 42384, "token": "addClass", "merges": "add Class", "count": 1311, "type": "remove by frequency"} -{"id": 16553, "token": "\u0120Jason", "merges": "\u0120J ason", "count": 1311, "type": "remove by frequency"} -{"id": 29800, "token": "rotic", "merges": "ro tic", "count": 1311, "type": "remove by frequency"} -{"id": 44366, "token": "\u00c3\u0141e", "merges": "\u00c3\u0141 e", "count": 1311, "type": "remove by frequency"} -{"id": 43067, "token": "----------------------------------------------------------------------", "merges": "---------------------------------------------------------------- ------", "count": 1312, "type": "remove by frequency"} -{"id": 32572, "token": "\u0120sufficiency", "merges": "\u0120su fficiency", "count": 1312, "type": "remove by frequency"} -{"id": 20169, "token": "\u0120isomorphism", "merges": "\u0120is omorphism", "count": 1312, "type": "remove by frequency"} -{"id": 19081, "token": "\u0120inoc", "merges": "\u0120in oc", "count": 1312, "type": "remove by frequency"} -{"id": 41336, "token": "\u0120dengue", "merges": "\u0120deng ue", "count": 1313, "type": "remove by frequency"} -{"id": 38569, "token": "bellum", "merges": "bell um", "count": 1313, "type": "remove by frequency"} -{"id": 19342, "token": "\u0120Griff", "merges": "\u0120Gr iff", "count": 1313, "type": "remove by frequency"} -{"id": 38493, "token": "\u0120queer", "merges": "\u0120que er", "count": 1313, "type": "remove by frequency"} -{"id": 31037, "token": "\u0120Wiley", "merges": "\u0120W iley", "count": 1313, "type": "remove by frequency"} -{"id": 23077, "token": "\u0120Thor", "merges": "\u0120Th or", "count": 1314, "type": "remove by frequency"} -{"id": 27014, "token": "\u0120Boys", "merges": "\u0120Bo ys", "count": 1315, "type": "remove by frequency"} -{"id": 48983, "token": "\u0120GOT", "merges": "\u0120G OT", "count": 1315, "type": "remove by frequency"} -{"id": 49865, "token": "\u0120contender", "merges": "\u0120cont ender", "count": 1316, "type": "remove by frequency"} -{"id": 13953, "token": "\u0120nous", "merges": "\u0120n ous", "count": 1316, "type": "remove by frequency"} -{"id": 46843, "token": "\u00e3\u0123\u0136", "merges": "\u00e3\u0123 \u0136", "count": 1316, "type": "remove by frequency"} -{"id": 26294, "token": "\u0120entanglement", "merges": "\u0120entang lement", "count": 1317, "type": "remove by frequency"} -{"id": 29084, "token": "\u0120Thought", "merges": "\u0120Th ought", "count": 1318, "type": "remove by frequency"} -{"id": 36140, "token": "\u0120anaest", "merges": "\u0120ana est", "count": 1318, "type": "remove by frequency"} -{"id": 48945, "token": "\u0120coined", "merges": "\u0120co ined", "count": 1318, "type": "remove by frequency"} -{"id": 19254, "token": "\u0120antim", "merges": "\u0120ant im", "count": 1318, "type": "remove by frequency"} -{"id": 43172, "token": "\u0120Duty", "merges": "\u0120D uty", "count": 1318, "type": "remove by frequency"} -{"id": 6413, "token": "\u0120Supreme", "merges": "\u0120Sup reme", "count": 1319, "type": "remove by frequency"} -{"id": 44580, "token": "ERTYPE", "merges": "ERT YPE", "count": 1319, "type": "remove by frequency"} -{"id": 20322, "token": "\u0120%>", "merges": "\u0120% >", "count": 1319, "type": "remove by frequency"} -{"id": 24365, "token": "\u0120implantation", "merges": "\u0120implant ation", "count": 1320, "type": "remove by frequency"} -{"id": 26492, "token": "\u0120methanol", "merges": "\u0120meth anol", "count": 1320, "type": "remove by frequency"} -{"id": 28728, "token": "\u0120analges", "merges": "\u0120anal ges", "count": 1320, "type": "remove by frequency"} -{"id": 32761, "token": "\u0120Above", "merges": "\u0120Ab ove", "count": 1320, "type": "remove by frequency"} -{"id": 20069, "token": "\u0120Hend", "merges": "\u0120H end", "count": 1320, "type": "remove by frequency"} -{"id": 23302, "token": "\u0120Mack", "merges": "\u0120M ack", "count": 1320, "type": "remove by frequency"} -{"id": 43502, "token": "\u0120inventors", "merges": "\u0120invent ors", "count": 1321, "type": "remove by frequency"} -{"id": 47921, "token": "\u0120trilogy", "merges": "\u0120tr ilogy", "count": 1321, "type": "remove by frequency"} -{"id": 16863, "token": "\u0120Carter", "merges": "\u0120Car ter", "count": 1321, "type": "remove by frequency"} -{"id": 28970, "token": "\u0120MeV", "merges": "\u0120Me V", "count": 1321, "type": "remove by frequency"} -{"id": 41820, "token": "\u0120visitation", "merges": "\u0120vis itation", "count": 1322, "type": "remove by frequency"} -{"id": 44483, "token": "oarthritis", "merges": "oarth ritis", "count": 1322, "type": "remove by frequency"} -{"id": 15893, "token": "\u0120teaspoon", "merges": "\u0120teasp oon", "count": 1322, "type": "remove by frequency"} -{"id": 16985, "token": "////////////////////////////////", "merges": "//////////////// ////////////////", "count": 1323, "type": "remove by frequency"} -{"id": 38240, "token": "\u0120roasted", "merges": "\u0120ro asted", "count": 1323, "type": "remove by frequency"} -{"id": 45996, "token": "idently", "merges": "ident ly", "count": 1323, "type": "remove by frequency"} -{"id": 14012, "token": "\u0120Ark", "merges": "\u0120Ar k", "count": 1323, "type": "remove by frequency"} -{"id": 28819, "token": "\u0120spatially", "merges": "\u0120spat ially", "count": 1324, "type": "remove by frequency"} -{"id": 30815, "token": "\u0120Hass", "merges": "\u0120H ass", "count": 1324, "type": "remove by frequency"} -{"id": 47067, "token": "\u0120NIR", "merges": "\u0120N IR", "count": 1324, "type": "remove by frequency"} -{"id": 46694, "token": "].\\", "merges": "]. \\", "count": 1324, "type": "remove by frequency"} -{"id": 44841, "token": "izaci\u00c3\u00b3n", "merges": "iz aci\u00c3\u00b3n", "count": 1325, "type": "remove by frequency"} -{"id": 38236, "token": "\u0120Sang", "merges": "\u0120S ang", "count": 1325, "type": "remove by frequency"} -{"id": 30473, "token": "\u0120Abr", "merges": "\u0120A br", "count": 1325, "type": "remove by frequency"} -{"id": 27377, "token": "\u0120inferences", "merges": "\u0120in ferences", "count": 1326, "type": "remove by frequency"} -{"id": 39451, "token": "\u0120softened", "merges": "\u0120soft ened", "count": 1326, "type": "remove by frequency"} -{"id": 38504, "token": "\u0120skeptic", "merges": "\u0120ske ptic", "count": 1326, "type": "remove by frequency"} -{"id": 23588, "token": "\u0120Male", "merges": "\u0120M ale", "count": 1326, "type": "remove by frequency"} -{"id": 47097, "token": "irled", "merges": "ir led", "count": 1327, "type": "remove by frequency"} -{"id": 29047, "token": "\u0120Cour", "merges": "\u0120C our", "count": 1327, "type": "remove by frequency"} -{"id": 29999, "token": "\u0120dismissing", "merges": "\u0120dismiss ing", "count": 1328, "type": "remove by frequency"} -{"id": 11137, "token": "\u0120Scot", "merges": "\u0120Sc ot", "count": 1328, "type": "remove by frequency"} -{"id": 45186, "token": "\u0120aliment", "merges": "\u0120al iment", "count": 1329, "type": "remove by frequency"} -{"id": 47434, "token": "\u00e3\u0125\u012c", "merges": "\u00e3\u0125 \u012c", "count": 1329, "type": "remove by frequency"} -{"id": 21066, "token": "\u0120synaptic", "merges": "\u0120syn aptic", "count": 1330, "type": "remove by frequency"} -{"id": 30280, "token": "\u0120TEX", "merges": "\u0120T EX", "count": 1330, "type": "remove by frequency"} -{"id": 36056, "token": "\u0120--------------------------", "merges": "\u0120---------------- ----------", "count": 1331, "type": "remove by frequency"} -{"id": 44803, "token": "\u0120storyline", "merges": "\u0120story line", "count": 1331, "type": "remove by frequency"} -{"id": 17678, "token": "\u0120primers", "merges": "\u0120prim ers", "count": 1331, "type": "remove by frequency"} -{"id": 15699, "token": "\u0120Overall", "merges": "\u0120Over all", "count": 1331, "type": "remove by frequency"} -{"id": 49719, "token": "\u0120Awesome", "merges": "\u0120Aw esome", "count": 1331, "type": "remove by frequency"} -{"id": 48253, "token": "\u0120Ment", "merges": "\u0120M ent", "count": 1331, "type": "remove by frequency"} -{"id": 41455, "token": "\u0120Correspond", "merges": "\u0120Cor respond", "count": 1332, "type": "remove by frequency"} -{"id": 48236, "token": "Portuguese", "merges": "Portug uese", "count": 1332, "type": "remove by frequency"} -{"id": 32592, "token": "\u0120Heavy", "merges": "\u0120He avy", "count": 1332, "type": "remove by frequency"} -{"id": 48936, "token": "\u0120=&", "merges": "\u0120= &", "count": 1332, "type": "remove by frequency"} -{"id": 41895, "token": "\u0120Pd", "merges": "\u0120P d", "count": 1332, "type": "remove by frequency"} -{"id": 43713, "token": "\u0120wonderfully", "merges": "\u0120wonder fully", "count": 1333, "type": "remove by frequency"} -{"id": 49963, "token": "\u0120clinging", "merges": "\u0120cl inging", "count": 1333, "type": "remove by frequency"} -{"id": 46220, "token": "oliberal", "merges": "ol iberal", "count": 1333, "type": "remove by frequency"} -{"id": 48994, "token": "\u0120massively", "merges": "\u0120mass ively", "count": 1334, "type": "remove by frequency"} -{"id": 4207, "token": "\\].", "merges": "\\] .", "count": 1334, "type": "remove by frequency"} -{"id": 43431, "token": "\u0120inhalation", "merges": "\u0120inhal ation", "count": 1335, "type": "remove by frequency"} -{"id": 39914, "token": "\u0120Doll", "merges": "\u0120D oll", "count": 1335, "type": "remove by frequency"} -{"id": 43312, "token": "\u0120Beginning", "merges": "\u0120Begin ning", "count": 1336, "type": "remove by frequency"} -{"id": 48490, "token": "\u0120sprayed", "merges": "\u0120spray ed", "count": 1336, "type": "remove by frequency"} -{"id": 27166, "token": "\u0120anatomical", "merges": "\u0120anatom ical", "count": 1337, "type": "remove by frequency"} -{"id": 34945, "token": "\u0120misdem", "merges": "\u0120mis dem", "count": 1337, "type": "remove by frequency"} -{"id": 45546, "token": "\u0120Danger", "merges": "\u0120D anger", "count": 1337, "type": "remove by frequency"} -{"id": 40992, "token": "\u0120cardio", "merges": "\u0120card io", "count": 1337, "type": "remove by frequency"} -{"id": 27338, "token": "\u0120parasites", "merges": "\u0120paras ites", "count": 1338, "type": "remove by frequency"} -{"id": 47830, "token": "\u0120frying", "merges": "\u0120f rying", "count": 1338, "type": "remove by frequency"} -{"id": 48229, "token": "rifying", "merges": "r ifying", "count": 1338, "type": "remove by frequency"} -{"id": 24035, "token": "\u0120Keith", "merges": "\u0120Ke ith", "count": 1338, "type": "remove by frequency"} -{"id": 29480, "token": "\u0120Simpson", "merges": "\u0120Sim pson", "count": 1339, "type": "remove by frequency"} -{"id": 21587, "token": "\u0120Juan", "merges": "\u0120J uan", "count": 1339, "type": "remove by frequency"} -{"id": 21193, "token": "\u0120Singh", "merges": "\u0120Sing h", "count": 1340, "type": "remove by frequency"} -{"id": 34396, "token": "\u0120Ran", "merges": "\u0120R an", "count": 1340, "type": "remove by frequency"} -{"id": 25311, "token": "\u0120ICU", "merges": "\u0120IC U", "count": 1340, "type": "remove by frequency"} -{"id": 47812, "token": "\u0120\u00d1\u0138", "merges": "\u0120\u00d1 \u0138", "count": 1340, "type": "remove by frequency"} -{"id": 14690, "token": "\u0120Matthe", "merges": "\u0120Mat the", "count": 1341, "type": "remove by frequency"} -{"id": 10657, "token": "\u0120Pak", "merges": "\u0120P ak", "count": 1341, "type": "remove by frequency"} -{"id": 21929, "token": "ozygous", "merges": "ozyg ous", "count": 1342, "type": "remove by frequency"} -{"id": 40402, "token": "\u0120Fight", "merges": "\u0120F ight", "count": 1342, "type": "remove by frequency"} -{"id": 30483, "token": "\u0120Haus", "merges": "\u0120H aus", "count": 1342, "type": "remove by frequency"} -{"id": 44351, "token": "\u0120continu", "merges": "\u0120contin u", "count": 1343, "type": "remove by frequency"} -{"id": 23142, "token": "\u0120Soph", "merges": "\u0120S oph", "count": 1343, "type": "remove by frequency"} -{"id": 31643, "token": "\u0120EXPRESS", "merges": "\u0120EX PRESS", "count": 1344, "type": "remove by frequency"} -{"id": 38224, "token": "\u0120Stress", "merges": "\u0120St ress", "count": 1344, "type": "remove by frequency"} -{"id": 15526, "token": "\u0120Chair", "merges": "\u0120Ch air", "count": 1344, "type": "remove by frequency"} -{"id": 16155, "token": "\u0120Executive", "merges": "\u0120Exec utive", "count": 1345, "type": "remove by frequency"} -{"id": 43341, "token": "\u0120mimics", "merges": "\u0120mim ics", "count": 1345, "type": "remove by frequency"} -{"id": 23024, "token": "\u0120airway", "merges": "\u0120air way", "count": 1345, "type": "remove by frequency"} -{"id": 11819, "token": "\u0120Steve", "merges": "\u0120Ste ve", "count": 1345, "type": "remove by frequency"} -{"id": 39688, "token": "\u0120Epic", "merges": "\u0120Ep ic", "count": 1345, "type": "remove by frequency"} -{"id": 49681, "token": "\u0120commuting", "merges": "\u0120comm uting", "count": 1346, "type": "remove by frequency"} -{"id": 35957, "token": "\u0120pictured", "merges": "\u0120pict ured", "count": 1346, "type": "remove by frequency"} -{"id": 37739, "token": "roidism", "merges": "roid ism", "count": 1346, "type": "remove by frequency"} -{"id": 16494, "token": "\u0120Debt", "merges": "\u0120De bt", "count": 1346, "type": "remove by frequency"} -{"id": 31933, "token": "\u0120Rush", "merges": "\u0120R ush", "count": 1346, "type": "remove by frequency"} -{"id": 40897, "token": "\u0120Dup", "merges": "\u0120D up", "count": 1346, "type": "remove by frequency"} -{"id": 41597, "token": "uitively", "merges": "uit ively", "count": 1347, "type": "remove by frequency"} -{"id": 12694, "token": "\u0120Elizabeth", "merges": "\u0120El izabeth", "count": 1348, "type": "remove by frequency"} -{"id": 16991, "token": "\u0120electroph", "merges": "\u0120elect roph", "count": 1349, "type": "remove by frequency"} -{"id": 15657, "token": "\u0120Ron", "merges": "\u0120R on", "count": 1349, "type": "remove by frequency"} -{"id": 22864, "token": "\u0120Fra", "merges": "\u0120F ra", "count": 1349, "type": "remove by frequency"} -{"id": 42390, "token": "\u0120startling", "merges": "\u0120start ling", "count": 1350, "type": "remove by frequency"} -{"id": 20368, "token": "\u0120intraven", "merges": "\u0120intra ven", "count": 1350, "type": "remove by frequency"} -{"id": 42250, "token": "\u0120novelist", "merges": "\u0120novel ist", "count": 1350, "type": "remove by frequency"} -{"id": 18096, "token": "\u0120Pho", "merges": "\u0120P ho", "count": 1350, "type": "remove by frequency"} -{"id": 46231, "token": "\u0120GV", "merges": "\u0120G V", "count": 1350, "type": "remove by frequency"} -{"id": 49273, "token": "\u0120unmistak", "merges": "\u0120unmist ak", "count": 1351, "type": "remove by frequency"} -{"id": 18629, "token": "\u0120catheter", "merges": "\u0120cathe ter", "count": 1351, "type": "remove by frequency"} -{"id": 17508, "token": "\u0120hepatic", "merges": "\u0120hep atic", "count": 1351, "type": "remove by frequency"} -{"id": 12961, "token": "\u0120Budd", "merges": "\u0120B udd", "count": 1351, "type": "remove by frequency"} -{"id": 9000, "token": "\u0120apopt", "merges": "\u0120ap opt", "count": 1352, "type": "remove by frequency"} -{"id": 29869, "token": "\u0120Sak", "merges": "\u0120S ak", "count": 1352, "type": "remove by frequency"} -{"id": 41102, "token": "\u00e3\u0123\u0139\u00e3\u0123\u00a6\u00e3\u0123\u0126\u00e3\u0124\u012d", "merges": "\u00e3\u0123\u0139 \u00e3\u0123\u00a6\u00e3\u0123\u0126\u00e3\u0124\u012d", "count": 1353, "type": "remove by frequency"} -{"id": 29573, "token": "\u0120Television", "merges": "\u0120Tele vision", "count": 1353, "type": "remove by frequency"} -{"id": 35527, "token": "\u0120endocrine", "merges": "\u0120end ocrine", "count": 1353, "type": "remove by frequency"} -{"id": 39564, "token": "a\u00c3\u00b1", "merges": "a \u00c3\u00b1", "count": 1354, "type": "remove by frequency"} -{"id": 38759, "token": "\u0120----------------------------", "merges": "\u0120---------------- ------------", "count": 1355, "type": "remove by frequency"} -{"id": 30807, "token": "\u0120muttered", "merges": "\u0120mut tered", "count": 1355, "type": "remove by frequency"} -{"id": 44507, "token": "\u0120mound", "merges": "\u0120m ound", "count": 1355, "type": "remove by frequency"} -{"id": 25548, "token": "\u0120cartilage", "merges": "\u0120cart ilage", "count": 1356, "type": "remove by frequency"} -{"id": 48089, "token": "\u0120raison", "merges": "\u0120ra ison", "count": 1356, "type": "remove by frequency"} -{"id": 32923, "token": "\u0120Pract", "merges": "\u0120P ract", "count": 1356, "type": "remove by frequency"} -{"id": 41759, "token": "\u0120comeback", "merges": "\u0120come back", "count": 1357, "type": "remove by frequency"} -{"id": 40233, "token": "\u0120Lambert", "merges": "\u0120Lam bert", "count": 1357, "type": "remove by frequency"} -{"id": 41751, "token": "\u0120d\u00c3\u00a9j\u00c3\u0142", "merges": "\u0120d\u00c3\u00a9 j\u00c3\u0142", "count": 1357, "type": "remove by frequency"} -{"id": 23814, "token": "\u00c4\u0139", "merges": "\u00c4 \u0139", "count": 1357, "type": "remove by frequency"} -{"id": 41632, "token": "\u0120Formation", "merges": "\u0120Form ation", "count": 1358, "type": "remove by frequency"} -{"id": 31916, "token": "\u0120Titan", "merges": "\u0120T itan", "count": 1358, "type": "remove by frequency"} -{"id": 40505, "token": "\u0120WANT", "merges": "\u0120W ANT", "count": 1358, "type": "remove by frequency"} -{"id": 19811, "token": "\u0120Ald", "merges": "\u0120A ld", "count": 1358, "type": "remove by frequency"} -{"id": 34511, "token": "\u0120Din", "merges": "\u0120D in", "count": 1358, "type": "remove by frequency"} -{"id": 24819, "token": "\u0120Tow", "merges": "\u0120T ow", "count": 1358, "type": "remove by frequency"} -{"id": 27339, "token": "\u00d7\u0133", "merges": "\u00d7 \u0133", "count": 1358, "type": "remove by frequency"} -{"id": 29493, "token": "\u0120nonprofit", "merges": "\u0120non profit", "count": 1359, "type": "remove by frequency"} -{"id": 27873, "token": "\u0120Benef", "merges": "\u0120Ben ef", "count": 1359, "type": "remove by frequency"} -{"id": 27409, "token": "\u0120Cot", "merges": "\u0120C ot", "count": 1359, "type": "remove by frequency"} -{"id": 46820, "token": "\u0120Coh", "merges": "\u0120C oh", "count": 1359, "type": "remove by frequency"} -{"id": 26915, "token": "\u0120polymerization", "merges": "\u0120polymer ization", "count": 1360, "type": "remove by frequency"} -{"id": 37148, "token": "inguishable", "merges": "inguish able", "count": 1360, "type": "remove by frequency"} -{"id": 35330, "token": "\u0120protons", "merges": "\u0120prot ons", "count": 1360, "type": "remove by frequency"} -{"id": 30205, "token": "othal", "merges": "oth al", "count": 1360, "type": "remove by frequency"} -{"id": 42134, "token": "\u0120Concert", "merges": "\u0120Con cert", "count": 1361, "type": "remove by frequency"} -{"id": 42227, "token": "\u0120quadrup", "merges": "\u0120quadr up", "count": 1361, "type": "remove by frequency"} -{"id": 46862, "token": "\u0120BBB", "merges": "\u0120B BB", "count": 1361, "type": "remove by frequency"} -{"id": 45680, "token": "\u0120ihm", "merges": "\u0120i hm", "count": 1361, "type": "remove by frequency"} -{"id": 36873, "token": "\u0120Chemistry", "merges": "\u0120Chem istry", "count": 1362, "type": "remove by frequency"} -{"id": 22056, "token": "\u0120ligands", "merges": "\u0120lig ands", "count": 1362, "type": "remove by frequency"} -{"id": 29732, "token": "\u0120Koh", "merges": "\u0120K oh", "count": 1362, "type": "remove by frequency"} -{"id": 23150, "token": "\u0120acetate", "merges": "\u0120acet ate", "count": 1363, "type": "remove by frequency"} -{"id": 47011, "token": "\u0120Artists", "merges": "\u0120Art ists", "count": 1363, "type": "remove by frequency"} -{"id": 16688, "token": "\u0120Being", "merges": "\u0120Be ing", "count": 1363, "type": "remove by frequency"} -{"id": 38151, "token": "jours", "merges": "j ours", "count": 1363, "type": "remove by frequency"} -{"id": 41666, "token": "\u0120\u00d0\u00bd\u00d0\u00be", "merges": "\u0120\u00d0\u00bd \u00d0\u00be", "count": 1363, "type": "remove by frequency"} -{"id": 26790, "token": "idyl", "merges": "id yl", "count": 1363, "type": "remove by frequency"} -{"id": 29218, "token": "\u0120Airlines", "merges": "\u0120Airl ines", "count": 1364, "type": "remove by frequency"} -{"id": 41203, "token": "\u0120seeming", "merges": "\u0120seem ing", "count": 1364, "type": "remove by frequency"} -{"id": 22551, "token": "\u0120Apr", "merges": "\u0120A pr", "count": 1364, "type": "remove by frequency"} -{"id": 38800, "token": "\u0120cowork", "merges": "\u0120cow ork", "count": 1365, "type": "remove by frequency"} -{"id": 39952, "token": "\u0120Rise", "merges": "\u0120R ise", "count": 1365, "type": "remove by frequency"} -{"id": 43023, "token": "acyl", "merges": "ac yl", "count": 1365, "type": "remove by frequency"} -{"id": 37562, "token": "\u0120obsess", "merges": "\u0120obs ess", "count": 1366, "type": "remove by frequency"} -{"id": 41837, "token": "\u0120pesso", "merges": "\u0120pess o", "count": 1366, "type": "remove by frequency"} -{"id": 50187, "token": "\u0120Semi", "merges": "\u0120Sem i", "count": 1366, "type": "remove by frequency"} -{"id": 48722, "token": "\u0120choked", "merges": "\u0120ch oked", "count": 1367, "type": "remove by frequency"} -{"id": 42907, "token": "iazep", "merges": "iaz ep", "count": 1367, "type": "remove by frequency"} -{"id": 39292, "token": "\u0120osteoporosis", "merges": "\u0120osteopor osis", "count": 1368, "type": "remove by frequency"} -{"id": 49685, "token": "\u0120Purple", "merges": "\u0120Pur ple", "count": 1368, "type": "remove by frequency"} -{"id": 39775, "token": "\u0120jquery", "merges": "\u0120j query", "count": 1368, "type": "remove by frequency"} -{"id": 31965, "token": "\u0120Sections", "merges": "\u0120Se ctions", "count": 1369, "type": "remove by frequency"} -{"id": 40479, "token": "\u0120pathetic", "merges": "\u0120pat hetic", "count": 1370, "type": "remove by frequency"} -{"id": 43565, "token": "\u0120LinkedIn", "merges": "\u0120Linked In", "count": 1370, "type": "remove by frequency"} -{"id": 40901, "token": "enging", "merges": "eng ing", "count": 1370, "type": "remove by frequency"} -{"id": 30751, "token": "\u0120mundo", "merges": "\u0120m undo", "count": 1370, "type": "remove by frequency"} -{"id": 41868, "token": "\u0120sensations", "merges": "\u0120sens ations", "count": 1371, "type": "remove by frequency"} -{"id": 31990, "token": "\u0120Kitchen", "merges": "\u0120Kit chen", "count": 1371, "type": "remove by frequency"} -{"id": 27876, "token": "\u0120Tesla", "merges": "\u0120Tes la", "count": 1371, "type": "remove by frequency"} -{"id": 34636, "token": "\u0120Kle", "merges": "\u0120K le", "count": 1371, "type": "remove by frequency"} -{"id": 42455, "token": "\u0120contrasting", "merges": "\u0120contrast ing", "count": 1372, "type": "remove by frequency"} -{"id": 15022, "token": "\u0120Clinical", "merges": "\u0120Cl inical", "count": 1373, "type": "remove by frequency"} -{"id": 9651, "token": "\u0120Police", "merges": "\u0120Pol ice", "count": 1373, "type": "remove by frequency"} -{"id": 47708, "token": "\u0120Stick", "merges": "\u0120St ick", "count": 1373, "type": "remove by frequency"} -{"id": 14861, "token": "\u0120Tony", "merges": "\u0120T ony", "count": 1373, "type": "remove by frequency"} -{"id": 44211, "token": "\u0120Horizon", "merges": "\u0120Hor izon", "count": 1374, "type": "remove by frequency"} -{"id": 33618, "token": "\u0120tendon", "merges": "\u0120tend on", "count": 1374, "type": "remove by frequency"} -{"id": 48607, "token": "ninger", "merges": "ning er", "count": 1375, "type": "remove by frequency"} -{"id": 37764, "token": "\u0120vort", "merges": "\u0120v ort", "count": 1375, "type": "remove by frequency"} -{"id": 30821, "token": "\u0120speculate", "merges": "\u0120spec ulate", "count": 1376, "type": "remove by frequency"} -{"id": 19027, "token": "\u0120Nelson", "merges": "\u0120N elson", "count": 1376, "type": "remove by frequency"} -{"id": 35610, "token": "\u0120plated", "merges": "\u0120pl ated", "count": 1376, "type": "remove by frequency"} -{"id": 38594, "token": "\u0120parap", "merges": "\u0120par ap", "count": 1376, "type": "remove by frequency"} -{"id": 18404, "token": "\u0120pancreatic", "merges": "\u0120pancreat ic", "count": 1377, "type": "remove by frequency"} -{"id": 46165, "token": "\u0120Videos", "merges": "\u0120V ideos", "count": 1377, "type": "remove by frequency"} -{"id": 48858, "token": "\u0120diagnosing", "merges": "\u0120diagn osing", "count": 1379, "type": "remove by frequency"} -{"id": 32312, "token": "\u0120Initially", "merges": "\u0120Init ially", "count": 1379, "type": "remove by frequency"} -{"id": 42774, "token": "ocyst", "merges": "ocy st", "count": 1379, "type": "remove by frequency"} -{"id": 45904, "token": "\u0120hypnot", "merges": "\u0120hyp not", "count": 1380, "type": "remove by frequency"} -{"id": 28857, "token": "ectomy", "merges": "ect omy", "count": 1380, "type": "remove by frequency"} -{"id": 42413, "token": "\u0120Tess", "merges": "\u0120T ess", "count": 1380, "type": "remove by frequency"} -{"id": 17476, "token": "\u00c3\u00aame", "merges": "\u00c3\u00aa me", "count": 1380, "type": "remove by frequency"} -{"id": 35863, "token": "\u0120FAQ", "merges": "\u0120FA Q", "count": 1380, "type": "remove by frequency"} -{"id": 16124, "token": "\u00c5\u0133", "merges": "\u00c5 \u0133", "count": 1380, "type": "remove by frequency"} -{"id": 30501, "token": "\u0120undisputed", "merges": "\u0120undis puted", "count": 1381, "type": "remove by frequency"} -{"id": 16598, "token": "\u0120Kennedy", "merges": "\u0120Kenn edy", "count": 1381, "type": "remove by frequency"} -{"id": 38578, "token": "ylase", "merges": "yl ase", "count": 1381, "type": "remove by frequency"} -{"id": 6691, "token": "\u0120Mex", "merges": "\u0120M ex", "count": 1381, "type": "remove by frequency"} -{"id": 48012, "token": "\u0120Chu", "merges": "\u0120Ch u", "count": 1381, "type": "remove by frequency"} -{"id": 47980, "token": "\u0120BX", "merges": "\u0120B X", "count": 1381, "type": "remove by frequency"} -{"id": 41292, "token": "\u0120connective", "merges": "\u0120connect ive", "count": 1382, "type": "remove by frequency"} -{"id": 43150, "token": "\u0120perin", "merges": "\u0120per in", "count": 1382, "type": "remove by frequency"} -{"id": 45961, "token": "\u0120Marathon", "merges": "\u0120Mar athon", "count": 1383, "type": "remove by frequency"} -{"id": 23984, "token": "ocytosis", "merges": "ocyt osis", "count": 1383, "type": "remove by frequency"} -{"id": 36215, "token": "\u0120Foods", "merges": "\u0120Food s", "count": 1383, "type": "remove by frequency"} -{"id": 40672, "token": "\u0120bland", "merges": "\u0120bl and", "count": 1383, "type": "remove by frequency"} -{"id": 48579, "token": "OMEM", "merges": "OM EM", "count": 1383, "type": "remove by frequency"} -{"id": 16622, "token": "\\#", "merges": "\\ #", "count": 1383, "type": "remove by frequency"} -{"id": 31856, "token": "olymer", "merges": "oly mer", "count": 1384, "type": "remove by frequency"} -{"id": 48546, "token": "\u0120v\u00c3\u0143", "merges": "\u0120v \u00c3\u0143", "count": 1384, "type": "remove by frequency"} -{"id": 28972, "token": "\u0120steroid", "merges": "\u0120ster oid", "count": 1385, "type": "remove by frequency"} -{"id": 25784, "token": "\u0120tubular", "merges": "\u0120tub ular", "count": 1385, "type": "remove by frequency"} -{"id": 15797, "token": "\u0120Upon", "merges": "\u0120Up on", "count": 1385, "type": "remove by frequency"} -{"id": 12578, "token": "\u0120FBI", "merges": "\u0120F BI", "count": 1385, "type": "remove by frequency"} -{"id": 49664, "token": "\u0120instructive", "merges": "\u0120instruct ive", "count": 1387, "type": "remove by frequency"} -{"id": 46402, "token": "\u0120puck", "merges": "\u0120p uck", "count": 1387, "type": "remove by frequency"} -{"id": 38691, "token": "\u0120impairments", "merges": "\u0120impair ments", "count": 1388, "type": "remove by frequency"} -{"id": 19244, "token": "\u0120Instit", "merges": "\u0120In stit", "count": 1388, "type": "remove by frequency"} -{"id": 12051, "token": "\u0120Wis", "merges": "\u0120W is", "count": 1388, "type": "remove by frequency"} -{"id": 47300, "token": "\u0120surfaced", "merges": "\u0120surf aced", "count": 1389, "type": "remove by frequency"} -{"id": 47006, "token": "\u0120emergent", "merges": "\u0120emerg ent", "count": 1389, "type": "remove by frequency"} -{"id": 8823, "token": "bsy", "merges": "bs y", "count": 1389, "type": "remove by frequency"} -{"id": 42876, "token": "\u0120marginally", "merges": "\u0120marg inally", "count": 1390, "type": "remove by frequency"} -{"id": 48409, "token": "\u0120pudding", "merges": "\u0120pud ding", "count": 1390, "type": "remove by frequency"} -{"id": 32716, "token": "ch\u00c3\u00a9", "merges": "ch \u00c3\u00a9", "count": 1390, "type": "remove by frequency"} -{"id": 45763, "token": "\u0120Hert", "merges": "\u0120H ert", "count": 1391, "type": "remove by frequency"} -{"id": 16393, "token": "\u0120Nev", "merges": "\u0120N ev", "count": 1391, "type": "remove by frequency"} -{"id": 23323, "token": "\u0120Associated", "merges": "\u0120Associ ated", "count": 1392, "type": "remove by frequency"} -{"id": 38264, "token": "\u0120Adventure", "merges": "\u0120Advent ure", "count": 1392, "type": "remove by frequency"} -{"id": 39764, "token": "\u0120aspirin", "merges": "\u0120aspir in", "count": 1392, "type": "remove by frequency"} -{"id": 21703, "token": "\u0120agar", "merges": "\u0120ag ar", "count": 1392, "type": "remove by frequency"} -{"id": 38731, "token": "\u0120Spar", "merges": "\u0120S par", "count": 1392, "type": "remove by frequency"} -{"id": 15472, "token": "\u00e0\u00a5\u0129", "merges": "\u00e0\u00a5 \u0129", "count": 1392, "type": "remove by frequency"} -{"id": 28688, "token": "\u0120eller", "merges": "\u0120 eller", "count": 1393, "type": "remove by frequency"} -{"id": 24309, "token": "\u0120medial", "merges": "\u0120med ial", "count": 1394, "type": "remove by frequency"} -{"id": 28733, "token": "\u00e0\u00a4\u00b8", "merges": "\u00e0\u00a4 \u00b8", "count": 1394, "type": "remove by frequency"} -{"id": 7247, "token": "\u0120Russian", "merges": "\u0120Russ ian", "count": 1395, "type": "remove by frequency"} -{"id": 25627, "token": "\u0120Annual", "merges": "\u0120Ann ual", "count": 1395, "type": "remove by frequency"} -{"id": 45806, "token": "ignant", "merges": "ign ant", "count": 1395, "type": "remove by frequency"} -{"id": 40881, "token": "\u0120fa\u00c3\u00a7", "merges": "\u0120fa \u00c3\u00a7", "count": 1395, "type": "remove by frequency"} -{"id": 35647, "token": "ktiv", "merges": "kt iv", "count": 1395, "type": "remove by frequency"} -{"id": 44813, "token": "pmod", "merges": "p mod", "count": 1395, "type": "remove by frequency"} -{"id": 29738, "token": "\u0120conductance", "merges": "\u0120conduct ance", "count": 1396, "type": "remove by frequency"} -{"id": 27950, "token": "\u0120Horse", "merges": "\u0120H orse", "count": 1397, "type": "remove by frequency"} -{"id": 19536, "token": "\u0120Nort", "merges": "\u0120N ort", "count": 1397, "type": "remove by frequency"} -{"id": 35123, "token": "\u0120mmHg", "merges": "\u0120mm Hg", "count": 1397, "type": "remove by frequency"} -{"id": 49884, "token": ">{{", "merges": "> {{", "count": 1397, "type": "remove by frequency"} -{"id": 10162, "token": "\u0120mitochond", "merges": "\u0120mit ochond", "count": 1398, "type": "remove by frequency"} -{"id": 49199, "token": "\u0120Kepler", "merges": "\u0120Ke pler", "count": 1398, "type": "remove by frequency"} -{"id": 41131, "token": "\u0120Amber", "merges": "\u0120A mber", "count": 1398, "type": "remove by frequency"} -{"id": 11739, "token": "\u0120Yeah", "merges": "\u0120Y eah", "count": 1398, "type": "remove by frequency"} -{"id": 27841, "token": "\u0120negligent", "merges": "\u0120neglig ent", "count": 1399, "type": "remove by frequency"} -{"id": 46280, "token": "\u0120filthy", "merges": "\u0120fil thy", "count": 1399, "type": "remove by frequency"} -{"id": 49927, "token": "\u0120stator", "merges": "\u0120st ator", "count": 1399, "type": "remove by frequency"} -{"id": 36634, "token": "\u0120SOD", "merges": "\u0120S OD", "count": 1399, "type": "remove by frequency"} -{"id": 48869, "token": "\u0120LAP", "merges": "\u0120L AP", "count": 1399, "type": "remove by frequency"} -{"id": 8568, "token": "\u00c3\u0133", "merges": "\u00c3 \u0133", "count": 1399, "type": "remove by frequency"} -{"id": 20146, "token": "\u0120Collins", "merges": "\u0120Coll ins", "count": 1400, "type": "remove by frequency"} -{"id": 48980, "token": "\u0120LINEAR", "merges": "\u0120LINE AR", "count": 1400, "type": "remove by frequency"} -{"id": 46050, "token": "\u0120Fiber", "merges": "\u0120F iber", "count": 1400, "type": "remove by frequency"} -{"id": 23597, "token": "}|\\", "merges": "} |\\", "count": 1400, "type": "remove by frequency"} -{"id": 38129, "token": "IImage", "merges": "II mage", "count": 1401, "type": "remove by frequency"} -{"id": 27334, "token": "\u0120Kil", "merges": "\u0120K il", "count": 1401, "type": "remove by frequency"} -{"id": 42003, "token": "\u0120occult", "merges": "\u0120occ ult", "count": 1402, "type": "remove by frequency"} -{"id": 30070, "token": "\u0120zum", "merges": "\u0120z um", "count": 1402, "type": "remove by frequency"} -{"id": 42277, "token": "--;", "merges": "-- ;", "count": 1402, "type": "remove by frequency"} -{"id": 44517, "token": "\u00e3\u0123\u00a8\u00e3\u0123\u0139\u00e3\u0123\u00a6", "merges": "\u00e3\u0123\u00a8 \u00e3\u0123\u0139\u00e3\u0123\u00a6", "count": 1403, "type": "remove by frequency"} -{"id": 49802, "token": "\u00c3\u00a7as", "merges": "\u00c3\u00a7 as", "count": 1403, "type": "remove by frequency"} -{"id": 49201, "token": "\u00e3\u0124\u00b1", "merges": "\u00e3\u0124 \u00b1", "count": 1403, "type": "remove by frequency"} -{"id": 20967, "token": "\u0120neurological", "merges": "\u0120neurolog ical", "count": 1404, "type": "remove by frequency"} -{"id": 43542, "token": "\u0120Morse", "merges": "\u0120Mor se", "count": 1404, "type": "remove by frequency"} -{"id": 40985, "token": "\u0120Bom", "merges": "\u0120B om", "count": 1404, "type": "remove by frequency"} -{"id": 33083, "token": "\u0120momento", "merges": "\u0120moment o", "count": 1405, "type": "remove by frequency"} -{"id": 46803, "token": "\u0120enamel", "merges": "\u0120en amel", "count": 1405, "type": "remove by frequency"} -{"id": 27264, "token": "\u0120Mario", "merges": "\u0120Mar io", "count": 1405, "type": "remove by frequency"} -{"id": 33614, "token": "\u0120Roc", "merges": "\u0120R oc", "count": 1405, "type": "remove by frequency"} -{"id": 40583, "token": "rendre", "merges": "rend re", "count": 1406, "type": "remove by frequency"} -{"id": 38459, "token": "\u0120Stim", "merges": "\u0120St im", "count": 1406, "type": "remove by frequency"} -{"id": 45639, "token": "\u0120dizz", "merges": "\u0120d izz", "count": 1406, "type": "remove by frequency"} -{"id": 33280, "token": "\u0120HIF", "merges": "\u0120H IF", "count": 1407, "type": "remove by frequency"} -{"id": 24381, "token": "\u00e0\u00a4\u0137", "merges": "\u00e0\u00a4 \u0137", "count": 1407, "type": "remove by frequency"} -{"id": 35896, "token": "\u0120Grande", "merges": "\u0120Grand e", "count": 1408, "type": "remove by frequency"} -{"id": 43551, "token": "\u0120niece", "merges": "\u0120nie ce", "count": 1408, "type": "remove by frequency"} -{"id": 43405, "token": "dominal", "merges": "dom inal", "count": 1409, "type": "remove by frequency"} -{"id": 44400, "token": "\u0120aides", "merges": "\u0120a ides", "count": 1409, "type": "remove by frequency"} -{"id": 32294, "token": "\u00c3\u00a2n", "merges": "\u00c3\u00a2 n", "count": 1409, "type": "remove by frequency"} -{"id": 17331, "token": "ifndef", "merges": "if ndef", "count": 1410, "type": "remove by frequency"} -{"id": 48231, "token": "\u0120NEVER", "merges": "\u0120N EVER", "count": 1410, "type": "remove by frequency"} -{"id": 46660, "token": "\u0120sill", "merges": "\u0120s ill", "count": 1410, "type": "remove by frequency"} -{"id": 34573, "token": "rinos", "merges": "rin os", "count": 1411, "type": "remove by frequency"} -{"id": 29961, "token": "\u0120Saw", "merges": "\u0120S aw", "count": 1411, "type": "remove by frequency"} -{"id": 47864, "token": "\u0120commercials", "merges": "\u0120commercial s", "count": 1412, "type": "remove by frequency"} -{"id": 47696, "token": "\u0120Meat", "merges": "\u0120Me at", "count": 1412, "type": "remove by frequency"} -{"id": 39867, "token": "\u00c5\u0124o", "merges": "\u00c5\u0124 o", "count": 1412, "type": "remove by frequency"} -{"id": 21759, "token": "\u0120detectable", "merges": "\u0120detect able", "count": 1413, "type": "remove by frequency"} -{"id": 46782, "token": "reptococcus", "merges": "rept ococcus", "count": 1413, "type": "remove by frequency"} -{"id": 22043, "token": "\u0120retinal", "merges": "\u0120ret inal", "count": 1413, "type": "remove by frequency"} -{"id": 16611, "token": "\u0120Creek", "merges": "\u0120C reek", "count": 1413, "type": "remove by frequency"} -{"id": 35618, "token": "\u0120osteopor", "merges": "\u0120oste opor", "count": 1414, "type": "remove by frequency"} -{"id": 18682, "token": "\u0120Lawrence", "merges": "\u0120Law rence", "count": 1414, "type": "remove by frequency"} -{"id": 50035, "token": "\u0120Pes", "merges": "\u0120P es", "count": 1414, "type": "remove by frequency"} -{"id": 38536, "token": "\u0120Questionnaire", "merges": "\u0120Question naire", "count": 1415, "type": "remove by frequency"} -{"id": 33427, "token": "\u0120Plaza", "merges": "\u0120Pl aza", "count": 1415, "type": "remove by frequency"} -{"id": 28317, "token": "\u0120Bath", "merges": "\u0120B ath", "count": 1415, "type": "remove by frequency"} -{"id": 50837, "token": "\u00e5\u012d\u00a7", "merges": "\u00e5\u012d \u00a7", "count": 1415, "type": "remove by frequency"} -{"id": 49349, "token": "\u00d0\u00b8\u00d1\u0124\u00d0\u00b5\u00d0\u00bb", "merges": "\u00d0\u00b8\u00d1\u0124 \u00d0\u00b5\u00d0\u00bb", "count": 1416, "type": "remove by frequency"} -{"id": 47382, "token": "ropathy", "merges": "rop athy", "count": 1416, "type": "remove by frequency"} -{"id": 50140, "token": "\u0120Seems", "merges": "\u0120Se ems", "count": 1416, "type": "remove by frequency"} -{"id": 44822, "token": "\u0120unnatural", "merges": "\u0120un natural", "count": 1417, "type": "remove by frequency"} -{"id": 37074, "token": "\u0120saliva", "merges": "\u0120sal iva", "count": 1417, "type": "remove by frequency"} -{"id": 36185, "token": "\u0120s\u00c3\u0143", "merges": "\u0120s \u00c3\u0143", "count": 1417, "type": "remove by frequency"} -{"id": 18571, "token": "\u0120arterial", "merges": "\u0120arter ial", "count": 1418, "type": "remove by frequency"} -{"id": 28913, "token": "\u0120ablation", "merges": "\u0120ab lation", "count": 1418, "type": "remove by frequency"} -{"id": 48736, "token": "\u0120abrog", "merges": "\u0120ab rog", "count": 1418, "type": "remove by frequency"} -{"id": 11531, "token": "\u0120inhibitors", "merges": "\u0120inhib itors", "count": 1419, "type": "remove by frequency"} -{"id": 36255, "token": "\u0120entangled", "merges": "\u0120entang led", "count": 1419, "type": "remove by frequency"} -{"id": 48541, "token": "\u0120plethora", "merges": "\u0120pleth ora", "count": 1419, "type": "remove by frequency"} -{"id": 46189, "token": "\u0120sinister", "merges": "\u0120sin ister", "count": 1419, "type": "remove by frequency"} -{"id": 34373, "token": "utively", "merges": "ut ively", "count": 1419, "type": "remove by frequency"} -{"id": 16638, "token": "\u0120McK", "merges": "\u0120Mc K", "count": 1419, "type": "remove by frequency"} -{"id": 37290, "token": "\u0120annihilation", "merges": "\u0120annih ilation", "count": 1420, "type": "remove by frequency"} -{"id": 49532, "token": "\u0120temperament", "merges": "\u0120temper ament", "count": 1420, "type": "remove by frequency"} -{"id": 38054, "token": "\u0120momenta", "merges": "\u0120moment a", "count": 1420, "type": "remove by frequency"} -{"id": 12733, "token": "\u0120plasm", "merges": "\u0120pl asm", "count": 1420, "type": "remove by frequency"} -{"id": 36526, "token": "\u00c3\u00a1z", "merges": "\u00c3\u00a1 z", "count": 1420, "type": "remove by frequency"} -{"id": 26528, "token": "\u0120spleen", "merges": "\u0120sp leen", "count": 1421, "type": "remove by frequency"} -{"id": 47084, "token": "indicated", "merges": "ind icated", "count": 1422, "type": "remove by frequency"} -{"id": 39524, "token": "\u0120MVP", "merges": "\u0120M VP", "count": 1422, "type": "remove by frequency"} -{"id": 24020, "token": "\u0120regimen", "merges": "\u0120regim en", "count": 1423, "type": "remove by frequency"} -{"id": 41024, "token": "\u0120amused", "merges": "\u0120am used", "count": 1423, "type": "remove by frequency"} -{"id": 28892, "token": "Theorem", "merges": "The orem", "count": 1423, "type": "remove by frequency"} -{"id": 48440, "token": "usamm", "merges": "us amm", "count": 1423, "type": "remove by frequency"} -{"id": 36385, "token": "\u0120palp", "merges": "\u0120pal p", "count": 1423, "type": "remove by frequency"} -{"id": 44323, "token": "\u0120Cake", "merges": "\u0120C ake", "count": 1423, "type": "remove by frequency"} -{"id": 21523, "token": "\u0120Rud", "merges": "\u0120R ud", "count": 1423, "type": "remove by frequency"} -{"id": 16861, "token": "\u0120Singapore", "merges": "\u0120Sing apore", "count": 1424, "type": "remove by frequency"} -{"id": 19019, "token": "\u0120embryos", "merges": "\u0120embry os", "count": 1424, "type": "remove by frequency"} -{"id": 19670, "token": "\u0120Theatre", "merges": "\u0120The atre", "count": 1424, "type": "remove by frequency"} -{"id": 46441, "token": "\u0120foliage", "merges": "\u0120foli age", "count": 1424, "type": "remove by frequency"} -{"id": 32469, "token": "\u0120simmer", "merges": "\u0120sim mer", "count": 1424, "type": "remove by frequency"} -{"id": 15876, "token": "binant", "merges": "bin ant", "count": 1424, "type": "remove by frequency"} -{"id": 41953, "token": "\u0120drm", "merges": "\u0120d rm", "count": 1424, "type": "remove by frequency"} -{"id": 42156, "token": "\u0120natur", "merges": "\u0120nat ur", "count": 1425, "type": "remove by frequency"} -{"id": 44294, "token": "\u0120phenomenal", "merges": "\u0120phenomen al", "count": 1426, "type": "remove by frequency"} -{"id": 26024, "token": "\u0120Molecular", "merges": "\u0120M olecular", "count": 1426, "type": "remove by frequency"} -{"id": 29199, "token": "\u0120vanishing", "merges": "\u0120van ishing", "count": 1426, "type": "remove by frequency"} -{"id": 44029, "token": "arroll", "merges": "ar roll", "count": 1426, "type": "remove by frequency"} -{"id": 49612, "token": "\u0120tame", "merges": "\u0120t ame", "count": 1426, "type": "remove by frequency"} -{"id": 47639, "token": "\u0120unmist", "merges": "\u0120unm ist", "count": 1427, "type": "remove by frequency"} -{"id": 48393, "token": "\u0120skirts", "merges": "\u0120sk irts", "count": 1427, "type": "remove by frequency"} -{"id": 48202, "token": "\u0120gond", "merges": "\u0120g ond", "count": 1427, "type": "remove by frequency"} -{"id": 25212, "token": "\u0120NGC", "merges": "\u0120N GC", "count": 1427, "type": "remove by frequency"} -{"id": 18736, "token": ")[(", "merges": ")[ (", "count": 1428, "type": "remove by frequency"} -{"id": 39067, "token": "\u0120Volunte", "merges": "\u0120Vol unte", "count": 1429, "type": "remove by frequency"} -{"id": 22301, "token": "\u0120Estate", "merges": "\u0120E state", "count": 1429, "type": "remove by frequency"} -{"id": 38652, "token": "propyl", "merges": "prop yl", "count": 1429, "type": "remove by frequency"} -{"id": 46881, "token": "\u0120outfits", "merges": "\u0120out fits", "count": 1430, "type": "remove by frequency"} -{"id": 27772, "token": "\u0120kann", "merges": "\u0120k ann", "count": 1430, "type": "remove by frequency"} -{"id": 31963, "token": "\u0120\u00e2\u013b", "merges": "\u0120\u00e2 \u013b", "count": 1430, "type": "remove by frequency"} -{"id": 50095, "token": "afx", "merges": "af x", "count": 1430, "type": "remove by frequency"} -{"id": 24366, "token": "\u0120conjecture", "merges": "\u0120conject ure", "count": 1431, "type": "remove by frequency"} -{"id": 14798, "token": "\u0120affidav", "merges": "\u0120aff idav", "count": 1431, "type": "remove by frequency"} -{"id": 41185, "token": "\u0120biting", "merges": "\u0120bit ing", "count": 1431, "type": "remove by frequency"} -{"id": 12429, "token": "\u0120Matt", "merges": "\u0120M att", "count": 1431, "type": "remove by frequency"} -{"id": 30687, "token": "\u0120kans", "merges": "\u0120k ans", "count": 1431, "type": "remove by frequency"} -{"id": 48342, "token": "\u0120theolog", "merges": "\u0120the olog", "count": 1432, "type": "remove by frequency"} -{"id": 22455, "token": "\u0120Mason", "merges": "\u0120M ason", "count": 1432, "type": "remove by frequency"} -{"id": 47156, "token": "\u0120muddy", "merges": "\u0120mud dy", "count": 1432, "type": "remove by frequency"} -{"id": 19935, "token": "\u0120aan", "merges": "\u0120a an", "count": 1432, "type": "remove by frequency"} -{"id": 41274, "token": "\u0120bulky", "merges": "\u0120bul ky", "count": 1433, "type": "remove by frequency"} -{"id": 49899, "token": "icarbon", "merges": "icar bon", "count": 1434, "type": "remove by frequency"} -{"id": 48708, "token": "!\");", "merges": "!\" );", "count": 1434, "type": "remove by frequency"} -{"id": 47895, "token": "\u0120authored", "merges": "\u0120auth ored", "count": 1435, "type": "remove by frequency"} -{"id": 19386, "token": "\u0120jQuery", "merges": "\u0120j Query", "count": 1436, "type": "remove by frequency"} -{"id": 47098, "token": "\u0120meats", "merges": "\u0120me ats", "count": 1436, "type": "remove by frequency"} -{"id": 46564, "token": "\u0120dealership", "merges": "\u0120deal ership", "count": 1437, "type": "remove by frequency"} -{"id": 43569, "token": "\u0120tunes", "merges": "\u0120tun es", "count": 1437, "type": "remove by frequency"} -{"id": 24214, "token": "\u0120lept", "merges": "\u0120le pt", "count": 1437, "type": "remove by frequency"} -{"id": 39738, "token": "\u0120psychiatrist", "merges": "\u0120psychiat rist", "count": 1438, "type": "remove by frequency"} -{"id": 49443, "token": "\u0120disdain", "merges": "\u0120dis dain", "count": 1438, "type": "remove by frequency"} -{"id": 33666, "token": "\u0120gluten", "merges": "\u0120gl uten", "count": 1438, "type": "remove by frequency"} -{"id": 41459, "token": "\u0120cages", "merges": "\u0120c ages", "count": 1438, "type": "remove by frequency"} -{"id": 25157, "token": "\u0120Aus", "merges": "\u0120A us", "count": 1439, "type": "remove by frequency"} -{"id": 31077, "token": "\u0120GST", "merges": "\u0120G ST", "count": 1439, "type": "remove by frequency"} -{"id": 47756, "token": "\u0120Dru", "merges": "\u0120D ru", "count": 1439, "type": "remove by frequency"} -{"id": 40518, "token": "\u0120Bd", "merges": "\u0120B d", "count": 1439, "type": "remove by frequency"} -{"id": 30415, "token": "\u0120hemorrhage", "merges": "\u0120hemorrh age", "count": 1440, "type": "remove by frequency"} -{"id": 48842, "token": "\u0120newborns", "merges": "\u0120newborn s", "count": 1440, "type": "remove by frequency"} -{"id": 15435, "token": "\u0120Patrick", "merges": "\u0120Pat rick", "count": 1440, "type": "remove by frequency"} -{"id": 47287, "token": "\u0120strlen", "merges": "\u0120str len", "count": 1440, "type": "remove by frequency"} -{"id": 48221, "token": "rists", "merges": "r ists", "count": 1440, "type": "remove by frequency"} -{"id": 43821, "token": "\u0120axon", "merges": "\u0120ax on", "count": 1440, "type": "remove by frequency"} -{"id": 30973, "token": "\u0120banc", "merges": "\u0120b anc", "count": 1440, "type": "remove by frequency"} -{"id": 40322, "token": "\u0120Nacional", "merges": "\u0120N acional", "count": 1441, "type": "remove by frequency"} -{"id": 33726, "token": "\u0120freshman", "merges": "\u0120fresh man", "count": 1441, "type": "remove by frequency"} -{"id": 36280, "token": "\u0120smelled", "merges": "\u0120sm elled", "count": 1441, "type": "remove by frequency"} -{"id": 15092, "token": "\u0120Within", "merges": "\u0120With in", "count": 1441, "type": "remove by frequency"} -{"id": 44450, "token": "amais", "merges": "ama is", "count": 1441, "type": "remove by frequency"} -{"id": 29937, "token": "\u00d8\u00b7", "merges": "\u00d8 \u00b7", "count": 1441, "type": "remove by frequency"} -{"id": 47987, "token": "\u0120percol", "merges": "\u0120per col", "count": 1442, "type": "remove by frequency"} -{"id": 34689, "token": "\u0120Phen", "merges": "\u0120P hen", "count": 1442, "type": "remove by frequency"} -{"id": 44560, "token": "\u0120Exposure", "merges": "\u0120Ex posure", "count": 1443, "type": "remove by frequency"} -{"id": 34553, "token": "\u0120yelling", "merges": "\u0120y elling", "count": 1443, "type": "remove by frequency"} -{"id": 26864, "token": "\u0120Eagle", "merges": "\u0120E agle", "count": 1444, "type": "remove by frequency"} -{"id": 24138, "token": "\u0120Reyn", "merges": "\u0120Re yn", "count": 1444, "type": "remove by frequency"} -{"id": 28420, "token": "ukary", "merges": "uk ary", "count": 1444, "type": "remove by frequency"} -{"id": 40449, "token": "\u0120disgusting", "merges": "\u0120disgust ing", "count": 1445, "type": "remove by frequency"} -{"id": 34264, "token": "\u0120XI", "merges": "\u0120X I", "count": 1445, "type": "remove by frequency"} -{"id": 46386, "token": "\u0120Hole", "merges": "\u0120H ole", "count": 1446, "type": "remove by frequency"} -{"id": 29730, "token": "\u0120Aur", "merges": "\u0120A ur", "count": 1446, "type": "remove by frequency"} -{"id": 46960, "token": "\u0120fertilization", "merges": "\u0120fert ilization", "count": 1447, "type": "remove by frequency"} -{"id": 32141, "token": "\u00e1\u0125\u0132\u00e1\u0125", "merges": "\u00e1\u0125\u0132 \u00e1\u0125", "count": 1447, "type": "remove by frequency"} -{"id": 42784, "token": "\u0120Tg", "merges": "\u0120T g", "count": 1447, "type": "remove by frequency"} -{"id": 36392, "token": "\u0120photometric", "merges": "\u0120phot ometric", "count": 1448, "type": "remove by frequency"} -{"id": 36568, "token": "\u0120undetect", "merges": "\u0120und etect", "count": 1448, "type": "remove by frequency"} -{"id": 46337, "token": "\u0120betrayal", "merges": "\u0120betray al", "count": 1448, "type": "remove by frequency"} -{"id": 10406, "token": "\u0120Olymp", "merges": "\u0120O lymp", "count": 1448, "type": "remove by frequency"} -{"id": 27280, "token": "\u0120Nak", "merges": "\u0120N ak", "count": 1448, "type": "remove by frequency"} -{"id": 31593, "token": "\u0120Sed", "merges": "\u0120S ed", "count": 1448, "type": "remove by frequency"} -{"id": 50229, "token": "\u0120boarded", "merges": "\u0120board ed", "count": 1449, "type": "remove by frequency"} -{"id": 19285, "token": "\u0120infar", "merges": "\u0120inf ar", "count": 1449, "type": "remove by frequency"} -{"id": 33479, "token": "\u0120lugar", "merges": "\u0120l ugar", "count": 1449, "type": "remove by frequency"} -{"id": 7078, "token": "--------------------------------------------------------------------------------------------------------------------------------", "merges": "---------------------------------------------------------------- ----------------------------------------------------------------", "count": 1450, "type": "remove by frequency"} -{"id": 39420, "token": "\u0120Lap", "merges": "\u0120L ap", "count": 1450, "type": "remove by frequency"} -{"id": 27516, "token": "\u0120Som", "merges": "\u0120S om", "count": 1450, "type": "remove by frequency"} -{"id": 40536, "token": "\u0120antioxidants", "merges": "\u0120antioxid ants", "count": 1451, "type": "remove by frequency"} -{"id": 28281, "token": "\u0120HOW", "merges": "\u0120H OW", "count": 1451, "type": "remove by frequency"} -{"id": 27577, "token": "\u0120modulated", "merges": "\u0120mod ulated", "count": 1452, "type": "remove by frequency"} -{"id": 48450, "token": "\u0120inertial", "merges": "\u0120inert ial", "count": 1452, "type": "remove by frequency"} -{"id": 16835, "token": "\u0120Hung", "merges": "\u0120H ung", "count": 1452, "type": "remove by frequency"} -{"id": 46175, "token": "\u00e0\u00a4\u00a6", "merges": "\u00e0\u00a4 \u00a6", "count": 1452, "type": "remove by frequency"} -{"id": 49203, "token": "\u0120dilute", "merges": "\u0120dil ute", "count": 1453, "type": "remove by frequency"} -{"id": 27659, "token": "\u0120Stuart", "merges": "\u0120St uart", "count": 1453, "type": "remove by frequency"} -{"id": 37568, "token": "\u0120lleg", "merges": "\u0120l leg", "count": 1453, "type": "remove by frequency"} -{"id": 40841, "token": "\u0120lors", "merges": "\u0120l ors", "count": 1453, "type": "remove by frequency"} -{"id": 27737, "token": "\u0120Bog", "merges": "\u0120B og", "count": 1453, "type": "remove by frequency"} -{"id": 21899, "token": "\u0120observational", "merges": "\u0120observ ational", "count": 1454, "type": "remove by frequency"} -{"id": 36416, "token": "uminescence", "merges": "umines cence", "count": 1454, "type": "remove by frequency"} -{"id": 40762, "token": "\u0120inning", "merges": "\u0120in ning", "count": 1454, "type": "remove by frequency"} -{"id": 43246, "token": "\u00d8\u00b3\u00d8\u00aa", "merges": "\u00d8\u00b3 \u00d8\u00aa", "count": 1454, "type": "remove by frequency"} -{"id": 33327, "token": "Supporting", "merges": "Supp orting", "count": 1455, "type": "remove by frequency"} -{"id": 41093, "token": "oarth", "merges": "o arth", "count": 1455, "type": "remove by frequency"} -{"id": 25890, "token": "\u00c3\u00b4t", "merges": "\u00c3\u00b4 t", "count": 1455, "type": "remove by frequency"} -{"id": 34348, "token": "\u0120transcribed", "merges": "\u0120trans cribed", "count": 1456, "type": "remove by frequency"} -{"id": 15194, "token": "\u00c4\u0135", "merges": "\u00c4 \u0135", "count": 1456, "type": "remove by frequency"} -{"id": 20163, "token": "\u0120Campbell", "merges": "\u0120Camp bell", "count": 1457, "type": "remove by frequency"} -{"id": 40688, "token": "\u0120Into", "merges": "\u0120In to", "count": 1457, "type": "remove by frequency"} -{"id": 49314, "token": "\u0120Wan", "merges": "\u0120W an", "count": 1457, "type": "remove by frequency"} -{"id": 26618, "token": "\u0120auditory", "merges": "\u0120aud itory", "count": 1458, "type": "remove by frequency"} -{"id": 15999, "token": "\u0120Unless", "merges": "\u0120Un less", "count": 1458, "type": "remove by frequency"} -{"id": 39669, "token": "\u0120EVERY", "merges": "\u0120EV ERY", "count": 1458, "type": "remove by frequency"} -{"id": 26543, "token": "\u0120Luck", "merges": "\u0120L uck", "count": 1458, "type": "remove by frequency"} -{"id": 42425, "token": "cumin", "merges": "c umin", "count": 1458, "type": "remove by frequency"} -{"id": 14481, "token": "\u0120inhibited", "merges": "\u0120inhib ited", "count": 1459, "type": "remove by frequency"} -{"id": 34349, "token": "\u0120Novel", "merges": "\u0120No vel", "count": 1459, "type": "remove by frequency"} -{"id": 40352, "token": "\u00e3\u0125\u012d", "merges": "\u00e3\u0125 \u012d", "count": 1459, "type": "remove by frequency"} -{"id": 31759, "token": ">()", "merges": "> ()", "count": 1459, "type": "remove by frequency"} -{"id": 47735, "token": "\u00d1\u013a", "merges": "\u00d1 \u013a", "count": 1459, "type": "remove by frequency"} -{"id": 33486, "token": "\u0120infamous", "merges": "\u0120inf amous", "count": 1460, "type": "remove by frequency"} -{"id": 38430, "token": "\u0120Neumann", "merges": "\u0120Ne umann", "count": 1460, "type": "remove by frequency"} -{"id": 16693, "token": "\u0120\u00c5\u0141", "merges": "\u0120\u00c5 \u0141", "count": 1460, "type": "remove by frequency"} -{"id": 45614, "token": "\u0120SiO", "merges": "\u0120Si O", "count": 1461, "type": "remove by frequency"} -{"id": 46404, "token": "empre", "merges": "em pre", "count": 1462, "type": "remove by frequency"} -{"id": 52407, "token": "\u00e6\u0143\u00b4", "merges": "\u00e6\u0143 \u00b4", "count": 1462, "type": "remove by frequency"} -{"id": 50205, "token": "\u0120oppressive", "merges": "\u0120opp ressive", "count": 1463, "type": "remove by frequency"} -{"id": 44297, "token": "\u0120trenches", "merges": "\u0120tren ches", "count": 1463, "type": "remove by frequency"} -{"id": 37735, "token": "\u0120startups", "merges": "\u0120start ups", "count": 1463, "type": "remove by frequency"} -{"id": 38684, "token": "\u0120Stark", "merges": "\u0120St ark", "count": 1463, "type": "remove by frequency"} -{"id": 32877, "token": "\u0120ethylene", "merges": "\u0120 ethylene", "count": 1464, "type": "remove by frequency"} -{"id": 15396, "token": "\u0120mutants", "merges": "\u0120mut ants", "count": 1464, "type": "remove by frequency"} -{"id": 43653, "token": "\u0120taxonomic", "merges": "\u0120tax onomic", "count": 1465, "type": "remove by frequency"} -{"id": 35015, "token": "\u0120Sessions", "merges": "\u0120S essions", "count": 1465, "type": "remove by frequency"} -{"id": 38326, "token": "\u0120pellets", "merges": "\u0120pel lets", "count": 1465, "type": "remove by frequency"} -{"id": 35054, "token": "\u0120memb", "merges": "\u0120mem b", "count": 1465, "type": "remove by frequency"} -{"id": 49244, "token": "\u0120MTV", "merges": "\u0120M TV", "count": 1465, "type": "remove by frequency"} -{"id": 31107, "token": "\u0120Md", "merges": "\u0120M d", "count": 1465, "type": "remove by frequency"} -{"id": 48249, "token": "\u0120fascination", "merges": "\u0120fasc ination", "count": 1466, "type": "remove by frequency"} -{"id": 37292, "token": "\u0120\u00c3\u00b8", "merges": "\u0120\u00c3 \u00b8", "count": 1466, "type": "remove by frequency"} -{"id": 38875, "token": "iasis", "merges": "i asis", "count": 1467, "type": "remove by frequency"} -{"id": 49438, "token": "ulif", "merges": "ul if", "count": 1467, "type": "remove by frequency"} -{"id": 39109, "token": "\u0120ophthal", "merges": "\u0120op hthal", "count": 1468, "type": "remove by frequency"} -{"id": 42007, "token": "itosan", "merges": "itos an", "count": 1468, "type": "remove by frequency"} -{"id": 28509, "token": "\u0120JUST", "merges": "\u0120J UST", "count": 1468, "type": "remove by frequency"} -{"id": 33515, "token": "\u0120Perl", "merges": "\u0120Per l", "count": 1468, "type": "remove by frequency"} -{"id": 26053, "token": "\u0120uit", "merges": "\u0120u it", "count": 1468, "type": "remove by frequency"} -{"id": 34146, "token": "\u0120Lt", "merges": "\u0120L t", "count": 1468, "type": "remove by frequency"} -{"id": 7160, "token": "\u0120Their", "merges": "\u0120The ir", "count": 1469, "type": "remove by frequency"} -{"id": 42765, "token": "\u0120Gri", "merges": "\u0120G ri", "count": 1470, "type": "remove by frequency"} -{"id": 41501, "token": "\u00d1\u0123\u00d1\u012e", "merges": "\u00d1\u0123 \u00d1\u012e", "count": 1470, "type": "remove by frequency"} -{"id": 19705, "token": "\u0120Athe", "merges": "\u0120A the", "count": 1471, "type": "remove by frequency"} -{"id": 28504, "token": "\u0120Stay", "merges": "\u0120St ay", "count": 1471, "type": "remove by frequency"} -{"id": 26678, "token": "^--", "merges": "^ --", "count": 1471, "type": "remove by frequency"} -{"id": 38986, "token": "\u0120SERVICES", "merges": "\u0120SERV ICES", "count": 1472, "type": "remove by frequency"} -{"id": 46957, "token": "\u0120decaying", "merges": "\u0120decay ing", "count": 1472, "type": "remove by frequency"} -{"id": 38375, "token": "ableView", "merges": "able View", "count": 1472, "type": "remove by frequency"} -{"id": 36854, "token": "\u0120polyp", "merges": "\u0120pol yp", "count": 1472, "type": "remove by frequency"} -{"id": 44727, "token": "\u0120Rouge", "merges": "\u0120Rou ge", "count": 1472, "type": "remove by frequency"} -{"id": 39070, "token": "\u0120Eff", "merges": "\u0120E ff", "count": 1472, "type": "remove by frequency"} -{"id": 38992, "token": "\u0120batting", "merges": "\u0120bat ting", "count": 1473, "type": "remove by frequency"} -{"id": 34201, "token": "escence", "merges": "es cence", "count": 1473, "type": "remove by frequency"} -{"id": 20377, "token": "yscall", "merges": "ys call", "count": 1473, "type": "remove by frequency"} -{"id": 39612, "token": "\u0120perceptual", "merges": "\u0120per ceptual", "count": 1474, "type": "remove by frequency"} -{"id": 44395, "token": "\u0120bumps", "merges": "\u0120b umps", "count": 1474, "type": "remove by frequency"} -{"id": 26989, "token": "\u0120Asp", "merges": "\u0120A sp", "count": 1474, "type": "remove by frequency"} -{"id": 26344, "token": "\u00d7\u00a9", "merges": "\u00d7 \u00a9", "count": 1475, "type": "remove by frequency"} -{"id": 40453, "token": "Petition", "merges": "P etition", "count": 1476, "type": "remove by frequency"} -{"id": 40569, "token": "\u0120mailed", "merges": "\u0120m ailed", "count": 1476, "type": "remove by frequency"} -{"id": 24367, "token": "\u0120Sor", "merges": "\u0120S or", "count": 1476, "type": "remove by frequency"} -{"id": 46273, "token": "\u0120STM", "merges": "\u0120ST M", "count": 1476, "type": "remove by frequency"} -{"id": 43147, "token": "v\u00c3\u00a9", "merges": "v \u00c3\u00a9", "count": 1476, "type": "remove by frequency"} -{"id": 10623, "token": "\u00e1\u00bd", "merges": "\u00e1 \u00bd", "count": 1476, "type": "remove by frequency"} -{"id": 27502, "token": "\u0120Hoff", "merges": "\u0120H off", "count": 1477, "type": "remove by frequency"} -{"id": 46952, "token": "\u0120avid", "merges": "\u0120av id", "count": 1477, "type": "remove by frequency"} -{"id": 38433, "token": "\u0120dissoci", "merges": "\u0120diss oci", "count": 1478, "type": "remove by frequency"} -{"id": 18364, "token": "\u0120thromb", "merges": "\u0120throm b", "count": 1478, "type": "remove by frequency"} -{"id": 31802, "token": "\u0120Viv", "merges": "\u0120V iv", "count": 1478, "type": "remove by frequency"} -{"id": 41132, "token": "\u00e0\u00b8\u0137", "merges": "\u00e0\u00b8 \u0137", "count": 1478, "type": "remove by frequency"} -{"id": 22709, "token": "\u0120Rodrig", "merges": "\u0120Rod rig", "count": 1479, "type": "remove by frequency"} -{"id": 28362, "token": "\u0120Gross", "merges": "\u0120G ross", "count": 1479, "type": "remove by frequency"} -{"id": 23972, "token": "\u0120Horn", "merges": "\u0120H orn", "count": 1479, "type": "remove by frequency"} -{"id": 28075, "token": "\u0120ionization", "merges": "\u0120ion ization", "count": 1480, "type": "remove by frequency"} -{"id": 27761, "token": "\u0120Standards", "merges": "\u0120Stand ards", "count": 1480, "type": "remove by frequency"} -{"id": 31165, "token": "\u0120\u00e0\u00ae", "merges": "\u0120 \u00e0\u00ae", "count": 1480, "type": "remove by frequency"} -{"id": 40460, "token": "\u0120corollary", "merges": "\u0120cor ollary", "count": 1481, "type": "remove by frequency"} -{"id": 18593, "token": "\u0120Nap", "merges": "\u0120N ap", "count": 1481, "type": "remove by frequency"} -{"id": 42291, "token": "\u0120Xu", "merges": "\u0120X u", "count": 1481, "type": "remove by frequency"} -{"id": 30265, "token": "\u00d8\u00b5", "merges": "\u00d8 \u00b5", "count": 1481, "type": "remove by frequency"} -{"id": 37357, "token": "\u0120Problems", "merges": "\u0120Pro blems", "count": 1482, "type": "remove by frequency"} -{"id": 44997, "token": "\u0120passer", "merges": "\u0120pass er", "count": 1482, "type": "remove by frequency"} -{"id": 33092, "token": "\u0120halluc", "merges": "\u0120hall uc", "count": 1482, "type": "remove by frequency"} -{"id": 47931, "token": "\u00d8\u00a7\u00d8\u00a8", "merges": "\u00d8\u00a7\u00d8 \u00a8", "count": 1482, "type": "remove by frequency"} -{"id": 40407, "token": "\u0120Kub", "merges": "\u0120K ub", "count": 1482, "type": "remove by frequency"} -{"id": 45821, "token": "\u00d9\u00be", "merges": "\u00d9 \u00be", "count": 1482, "type": "remove by frequency"} -{"id": 32184, "token": "\u0120metabolite", "merges": "\u0120metabol ite", "count": 1483, "type": "remove by frequency"} -{"id": 22801, "token": "\u0120Stevens", "merges": "\u0120Ste vens", "count": 1483, "type": "remove by frequency"} -{"id": 31373, "token": "\u0120Whole", "merges": "\u0120Wh ole", "count": 1483, "type": "remove by frequency"} -{"id": 5786, "token": "otimes", "merges": "ot imes", "count": 1483, "type": "remove by frequency"} -{"id": 21582, "token": "\u0120quas", "merges": "\u0120qu as", "count": 1483, "type": "remove by frequency"} -{"id": 33052, "token": "\u0120keV", "merges": "\u0120ke V", "count": 1483, "type": "remove by frequency"} -{"id": 18323, "token": "\u0120biopsy", "merges": "\u0120bi opsy", "count": 1484, "type": "remove by frequency"} -{"id": 49527, "token": "oughed", "merges": "oug hed", "count": 1484, "type": "remove by frequency"} -{"id": 44723, "token": "\u0120\u00e2\u0128\u0133", "merges": "\u0120\u00e2\u0128 \u0133", "count": 1484, "type": "remove by frequency"} -{"id": 47210, "token": "omyc", "merges": "omy c", "count": 1484, "type": "remove by frequency"} -{"id": 41779, "token": "\u0120FY", "merges": "\u0120F Y", "count": 1484, "type": "remove by frequency"} -{"id": 39074, "token": "\u0120autobi", "merges": "\u0120aut obi", "count": 1485, "type": "remove by frequency"} -{"id": 44108, "token": "\u0120softer", "merges": "\u0120so fter", "count": 1485, "type": "remove by frequency"} -{"id": 37302, "token": "\u0120raced", "merges": "\u0120r aced", "count": 1485, "type": "remove by frequency"} -{"id": 27807, "token": "\u0120intriguing", "merges": "\u0120intrig uing", "count": 1486, "type": "remove by frequency"} -{"id": 23085, "token": "\u0120Andre", "merges": "\u0120And re", "count": 1486, "type": "remove by frequency"} -{"id": 44601, "token": "\u0120blaming", "merges": "\u0120bl aming", "count": 1487, "type": "remove by frequency"} -{"id": 38969, "token": "\u0120passions", "merges": "\u0120pass ions", "count": 1488, "type": "remove by frequency"} -{"id": 10956, "token": "\u0120Father", "merges": "\u0120F ather", "count": 1488, "type": "remove by frequency"} -{"id": 37352, "token": "\u0120polyethylene", "merges": "\u0120poly ethylene", "count": 1489, "type": "remove by frequency"} -{"id": 45288, "token": "\u0120straps", "merges": "\u0120stra ps", "count": 1489, "type": "remove by frequency"} -{"id": 45054, "token": "\u0120foli", "merges": "\u0120fol i", "count": 1489, "type": "remove by frequency"} -{"id": 39306, "token": "\u0120reproducing", "merges": "\u0120reprodu cing", "count": 1490, "type": "remove by frequency"} -{"id": 45800, "token": "\u0120ascribed", "merges": "\u0120as cribed", "count": 1490, "type": "remove by frequency"} -{"id": 35469, "token": "\u0120Outside", "merges": "\u0120Out side", "count": 1490, "type": "remove by frequency"} -{"id": 45567, "token": "strous", "merges": "str ous", "count": 1490, "type": "remove by frequency"} -{"id": 4397, "token": "\u0120September", "merges": "\u0120Sept ember", "count": 1491, "type": "remove by frequency"} -{"id": 26412, "token": "\u0120pleading", "merges": "\u0120ple ading", "count": 1491, "type": "remove by frequency"} -{"id": 16345, "token": "nutrients", "merges": "nut rients", "count": 1491, "type": "remove by frequency"} -{"id": 45974, "token": ".\";", "merges": ".\" ;", "count": 1491, "type": "remove by frequency"} -{"id": 43520, "token": "\u0120resistivity", "merges": "\u0120resist ivity", "count": 1492, "type": "remove by frequency"} -{"id": 38525, "token": "\u0120blogging", "merges": "\u0120blog ging", "count": 1492, "type": "remove by frequency"} -{"id": 15002, "token": "\u0120WITHOUT", "merges": "\u0120WITH OUT", "count": 1492, "type": "remove by frequency"} -{"id": 27117, "token": "\u0120Yam", "merges": "\u0120Y am", "count": 1492, "type": "remove by frequency"} -{"id": 36328, "token": "\u0120Fen", "merges": "\u0120F en", "count": 1492, "type": "remove by frequency"} -{"id": 27508, "token": "\u0120@\"", "merges": "\u0120@ \"", "count": 1492, "type": "remove by frequency"} -{"id": 14489, "token": "\u00e0\u00b2", "merges": "\u00e0 \u00b2", "count": 1492, "type": "remove by frequency"} -{"id": 34527, "token": "geries", "merges": "ger ies", "count": 1493, "type": "remove by frequency"} -{"id": 29292, "token": "\u0120penis", "merges": "\u0120pen is", "count": 1493, "type": "remove by frequency"} -{"id": 21995, "token": "\u0120Sad", "merges": "\u0120S ad", "count": 1493, "type": "remove by frequency"} -{"id": 29876, "token": "\u0120Vic", "merges": "\u0120V ic", "count": 1493, "type": "remove by frequency"} -{"id": 43176, "token": "\u0120gripped", "merges": "\u0120gri pped", "count": 1494, "type": "remove by frequency"} -{"id": 46334, "token": "\u0120toutes", "merges": "\u0120tout es", "count": 1494, "type": "remove by frequency"} -{"id": 31786, "token": "\u0120famil", "merges": "\u0120fam il", "count": 1494, "type": "remove by frequency"} -{"id": 23789, "token": "\u0120Youth", "merges": "\u0120You th", "count": 1494, "type": "remove by frequency"} -{"id": 42731, "token": "ubunt", "merges": "ub unt", "count": 1494, "type": "remove by frequency"} -{"id": 28978, "token": "\u0120diagnoses", "merges": "\u0120diagn oses", "count": 1495, "type": "remove by frequency"} -{"id": 46739, "token": "remia", "merges": "rem ia", "count": 1495, "type": "remove by frequency"} -{"id": 38201, "token": "\u0120Weyl", "merges": "\u0120We yl", "count": 1495, "type": "remove by frequency"} -{"id": 39251, "token": "\u00e0\u00b8\u00b4", "merges": "\u00e0\u00b8 \u00b4", "count": 1495, "type": "remove by frequency"} -{"id": 34593, "token": "\u0120persuasive", "merges": "\u0120persu asive", "count": 1496, "type": "remove by frequency"} -{"id": 29655, "token": "\u0120Mull", "merges": "\u0120M ull", "count": 1496, "type": "remove by frequency"} -{"id": 25852, "token": "\u0120Lamb", "merges": "\u0120L amb", "count": 1496, "type": "remove by frequency"} -{"id": 31640, "token": "\u0120robustness", "merges": "\u0120robust ness", "count": 1497, "type": "remove by frequency"} -{"id": 48256, "token": "crystalline", "merges": "crystall ine", "count": 1497, "type": "remove by frequency"} -{"id": 24402, "token": "\u0120carot", "merges": "\u0120car ot", "count": 1497, "type": "remove by frequency"} -{"id": 43587, "token": "\u0120mejor", "merges": "\u0120me jor", "count": 1497, "type": "remove by frequency"} -{"id": 47304, "token": "\u0120Collections", "merges": "\u0120Col lections", "count": 1498, "type": "remove by frequency"} -{"id": 50217, "token": "\u0120relegated", "merges": "\u0120rele gated", "count": 1498, "type": "remove by frequency"} -{"id": 30429, "token": "\u0120cellulose", "merges": "\u0120cell ulose", "count": 1498, "type": "remove by frequency"} -{"id": 47466, "token": "\u0120hinted", "merges": "\u0120hint ed", "count": 1498, "type": "remove by frequency"} -{"id": 43985, "token": "\u0120pleth", "merges": "\u0120ple th", "count": 1498, "type": "remove by frequency"} -{"id": 11486, "token": "ARRANT", "merges": "ARR ANT", "count": 1498, "type": "remove by frequency"} -{"id": 45844, "token": "\u0120vara", "merges": "\u0120var a", "count": 1498, "type": "remove by frequency"} -{"id": 35624, "token": "\u0120Resistance", "merges": "\u0120Res istance", "count": 1500, "type": "remove by frequency"} -{"id": 25666, "token": "\u0120LIMITED", "merges": "\u0120LIM ITED", "count": 1500, "type": "remove by frequency"} -{"id": 48445, "token": "\u0120frown", "merges": "\u0120f rown", "count": 1500, "type": "remove by frequency"} -{"id": 40924, "token": "\u0120exem", "merges": "\u0120ex em", "count": 1500, "type": "remove by frequency"} -{"id": 25571, "token": "\u0120Riemann", "merges": "\u0120R iemann", "count": 1501, "type": "remove by frequency"} -{"id": 31152, "token": "\u0120subpo", "merges": "\u0120sub po", "count": 1501, "type": "remove by frequency"} -{"id": 46062, "token": "\u0120Means", "merges": "\u0120Me ans", "count": 1501, "type": "remove by frequency"} -{"id": 25211, "token": "\u0120bowel", "merges": "\u0120bow el", "count": 1501, "type": "remove by frequency"} -{"id": 46185, "token": "\u0120Funds", "merges": "\u0120Fun ds", "count": 1502, "type": "remove by frequency"} -{"id": 49335, "token": "\u0120manic", "merges": "\u0120man ic", "count": 1503, "type": "remove by frequency"} -{"id": 45828, "token": "\u0120Nil", "merges": "\u0120N il", "count": 1503, "type": "remove by frequency"} -{"id": 38095, "token": "\u00c3\u00b6l", "merges": "\u00c3\u00b6 l", "count": 1503, "type": "remove by frequency"} -{"id": 41236, "token": "\u0120whiskey", "merges": "\u0120whis key", "count": 1504, "type": "remove by frequency"} -{"id": 10035, "token": "\u0120Mike", "merges": "\u0120M ike", "count": 1504, "type": "remove by frequency"} -{"id": 48232, "token": "\u0120frig", "merges": "\u0120fr ig", "count": 1504, "type": "remove by frequency"} -{"id": 47517, "token": "\u0120depressing", "merges": "\u0120dep ressing", "count": 1505, "type": "remove by frequency"} -{"id": 38981, "token": "\u0120parabolic", "merges": "\u0120par abolic", "count": 1505, "type": "remove by frequency"} -{"id": 44907, "token": "\u0120Gaming", "merges": "\u0120G aming", "count": 1505, "type": "remove by frequency"} -{"id": 39929, "token": "\u00c3\u00a9t\u00c3\u00a9", "merges": "\u00c3\u00a9t \u00c3\u00a9", "count": 1505, "type": "remove by frequency"} -{"id": 38338, "token": "Supplemental", "merges": "Supp lemental", "count": 1506, "type": "remove by frequency"} -{"id": 43235, "token": "\u0120seasoned", "merges": "\u0120season ed", "count": 1506, "type": "remove by frequency"} -{"id": 16773, "token": "\u0120rape", "merges": "\u0120ra pe", "count": 1506, "type": "remove by frequency"} -{"id": 49167, "token": "\u0120blinding", "merges": "\u0120bl inding", "count": 1507, "type": "remove by frequency"} -{"id": 31575, "token": "\u0120Champion", "merges": "\u0120Champ ion", "count": 1507, "type": "remove by frequency"} -{"id": 36188, "token": "\u0120qu\u00c3\u00a9", "merges": "\u0120qu \u00c3\u00a9", "count": 1507, "type": "remove by frequency"} -{"id": 22607, "token": "\u0120Wor", "merges": "\u0120W or", "count": 1507, "type": "remove by frequency"} -{"id": 47297, "token": "\u0120RAS", "merges": "\u0120R AS", "count": 1507, "type": "remove by frequency"} -{"id": 13295, "token": "\u0120Conserv", "merges": "\u0120Cons erv", "count": 1508, "type": "remove by frequency"} -{"id": 39214, "token": "\u0120punched", "merges": "\u0120pun ched", "count": 1508, "type": "remove by frequency"} -{"id": 47683, "token": "\u0120famed", "merges": "\u0120fam ed", "count": 1508, "type": "remove by frequency"} -{"id": 31100, "token": "\u0120CBD", "merges": "\u0120C BD", "count": 1508, "type": "remove by frequency"} -{"id": 42588, "token": "\u0120methamphetamine", "merges": "\u0120meth amphetamine", "count": 1509, "type": "remove by frequency"} -{"id": 45115, "token": "\u0120dehydration", "merges": "\u0120de hydration", "count": 1509, "type": "remove by frequency"} -{"id": 38182, "token": "\u0120Factors", "merges": "\u0120Fact ors", "count": 1509, "type": "remove by frequency"} -{"id": 30729, "token": "\u00e1\u0125\u013a", "merges": "\u00e1\u0125 \u013a", "count": 1509, "type": "remove by frequency"} -{"id": 16446, "token": "$;", "merges": "$ ;", "count": 1509, "type": "remove by frequency"} -{"id": 23212, "token": "\u0120Metro", "merges": "\u0120Met ro", "count": 1510, "type": "remove by frequency"} -{"id": 46324, "token": "\u0120scoop", "merges": "\u0120sc oop", "count": 1510, "type": "remove by frequency"} -{"id": 37836, "token": "\u0120stabbed", "merges": "\u0120stab bed", "count": 1511, "type": "remove by frequency"} -{"id": 38420, "token": "\u0120amusing", "merges": "\u0120am using", "count": 1511, "type": "remove by frequency"} -{"id": 21622, "token": "it\u00c3\u0142", "merges": "it \u00c3\u0142", "count": 1511, "type": "remove by frequency"} -{"id": 43260, "token": "\u00e0\u00b8\u00ab", "merges": "\u00e0\u00b8 \u00ab", "count": 1511, "type": "remove by frequency"} -{"id": 50214, "token": "\u0120Aircraft", "merges": "\u0120A ircraft", "count": 1512, "type": "remove by frequency"} -{"id": 43981, "token": "\u0120tending", "merges": "\u0120t ending", "count": 1512, "type": "remove by frequency"} -{"id": 43593, "token": "\u0120THREE", "merges": "\u0120TH REE", "count": 1512, "type": "remove by frequency"} -{"id": 3303, "token": "}$,", "merges": "}$ ,", "count": 1512, "type": "remove by frequency"} -{"id": 47330, "token": "\u0120thrilling", "merges": "\u0120thr illing", "count": 1513, "type": "remove by frequency"} -{"id": 35166, "token": "\u0120Serve", "merges": "\u0120S erve", "count": 1513, "type": "remove by frequency"} -{"id": 38355, "token": "\u0120outrageous", "merges": "\u0120outrage ous", "count": 1514, "type": "remove by frequency"} -{"id": 38871, "token": "\u0120curled", "merges": "\u0120cur led", "count": 1514, "type": "remove by frequency"} -{"id": 44992, "token": "\u0120sacks", "merges": "\u0120s acks", "count": 1514, "type": "remove by frequency"} -{"id": 39461, "token": "j\u00c3\u0142", "merges": "j \u00c3\u0142", "count": 1514, "type": "remove by frequency"} -{"id": 43154, "token": "\u0120dislocation", "merges": "\u0120dis location", "count": 1515, "type": "remove by frequency"} -{"id": 47955, "token": "\u0120mystical", "merges": "\u0120myst ical", "count": 1515, "type": "remove by frequency"} -{"id": 48972, "token": "\u0120cooks", "merges": "\u0120cook s", "count": 1515, "type": "remove by frequency"} -{"id": 21057, "token": "\u0120irradiation", "merges": "\u0120irrad iation", "count": 1516, "type": "remove by frequency"} -{"id": 46821, "token": "inities", "merges": "in ities", "count": 1516, "type": "remove by frequency"} -{"id": 50315, "token": "\u00e2\u012a\u0137", "merges": "\u00e2\u012a \u0137", "count": 1516, "type": "remove by frequency"} -{"id": 50287, "token": "\u00cb\u0132", "merges": "\u00cb \u0132", "count": 1516, "type": "remove by frequency"} -{"id": 3918, "token": "\u0120President", "merges": "\u0120Pres ident", "count": 1517, "type": "remove by frequency"} -{"id": 42708, "token": "\u0120casually", "merges": "\u0120cas ually", "count": 1517, "type": "remove by frequency"} -{"id": 35509, "token": "substituted", "merges": "sub stituted", "count": 1518, "type": "remove by frequency"} -{"id": 42497, "token": "\u0120Musk", "merges": "\u0120Mus k", "count": 1518, "type": "remove by frequency"} -{"id": 47425, "token": "\u0120sweating", "merges": "\u0120swe ating", "count": 1519, "type": "remove by frequency"} -{"id": 50136, "token": "\u0120traitor", "merges": "\u0120tra itor", "count": 1520, "type": "remove by frequency"} -{"id": 27848, "token": "\u0120Airl", "merges": "\u0120A irl", "count": 1520, "type": "remove by frequency"} -{"id": 44970, "token": "\u0120noct", "merges": "\u0120no ct", "count": 1520, "type": "remove by frequency"} -{"id": 45286, "token": "\u0120impregn", "merges": "\u0120imp regn", "count": 1521, "type": "remove by frequency"} -{"id": 30091, "token": "\u0120mening", "merges": "\u0120men ing", "count": 1521, "type": "remove by frequency"} -{"id": 26048, "token": "\u0120Sweet", "merges": "\u0120S weet", "count": 1521, "type": "remove by frequency"} -{"id": 9253, "token": "\u0120supra", "merges": "\u0120sup ra", "count": 1521, "type": "remove by frequency"} -{"id": 46740, "token": "\u0120gallon", "merges": "\u0120gall on", "count": 1522, "type": "remove by frequency"} -{"id": 49125, "token": "\u0120Heter", "merges": "\u0120H eter", "count": 1522, "type": "remove by frequency"} -{"id": 46768, "token": "'\">", "merges": "' \">", "count": 1522, "type": "remove by frequency"} -{"id": 20316, "token": "\u0120Environmental", "merges": "\u0120Environment al", "count": 1523, "type": "remove by frequency"} -{"id": 46203, "token": "\u0120dwarfs", "merges": "\u0120dwar fs", "count": 1523, "type": "remove by frequency"} -{"id": 47170, "token": "\u00c6\u00a1", "merges": "\u00c6 \u00a1", "count": 1523, "type": "remove by frequency"} -{"id": 45666, "token": "\u0120simplicial", "merges": "\u0120sim plicial", "count": 1524, "type": "remove by frequency"} -{"id": 36307, "token": "\u0120noticing", "merges": "\u0120not icing", "count": 1524, "type": "remove by frequency"} -{"id": 26051, "token": "YRIGHT", "merges": "YR IGHT", "count": 1524, "type": "remove by frequency"} -{"id": 29747, "token": "\u0120concomitant", "merges": "\u0120concomit ant", "count": 1526, "type": "remove by frequency"} -{"id": 45036, "token": "\u0120capacitors", "merges": "\u0120capac itors", "count": 1526, "type": "remove by frequency"} -{"id": 20757, "token": "\u0120Turner", "merges": "\u0120Turn er", "count": 1526, "type": "remove by frequency"} -{"id": 47257, "token": "fluoro", "merges": "flu oro", "count": 1526, "type": "remove by frequency"} -{"id": 42031, "token": "Rather", "merges": "R ather", "count": 1526, "type": "remove by frequency"} -{"id": 37742, "token": "\u0120respiration", "merges": "\u0120resp iration", "count": 1527, "type": "remove by frequency"} -{"id": 20923, "token": "\u0120Stanley", "merges": "\u0120Stan ley", "count": 1527, "type": "remove by frequency"} -{"id": 15594, "token": "\u0120Sure", "merges": "\u0120S ure", "count": 1527, "type": "remove by frequency"} -{"id": 22004, "token": "\u0120\\*\\*", "merges": "\u0120\\* \\*", "count": 1527, "type": "remove by frequency"} -{"id": 50247, "token": "itons", "merges": "it ons", "count": 1527, "type": "remove by frequency"} -{"id": 29675, "token": "omorphisms", "merges": "omorph isms", "count": 1528, "type": "remove by frequency"} -{"id": 46129, "token": "\u0120crystallization", "merges": "\u0120crystall ization", "count": 1529, "type": "remove by frequency"} -{"id": 40691, "token": "terminus", "merges": "ter minus", "count": 1529, "type": "remove by frequency"} -{"id": 41905, "token": "\u0120\u00d0\u0135", "merges": "\u0120\u00d0 \u0135", "count": 1529, "type": "remove by frequency"} -{"id": 25267, "token": "\u0120reagents", "merges": "\u0120re agents", "count": 1530, "type": "remove by frequency"} -{"id": 45963, "token": "\u0120identically", "merges": "\u0120ident ically", "count": 1531, "type": "remove by frequency"} -{"id": 27878, "token": "\u0120Academic", "merges": "\u0120Academ ic", "count": 1531, "type": "remove by frequency"} -{"id": 43714, "token": "\u0120Cheese", "merges": "\u0120Che ese", "count": 1531, "type": "remove by frequency"} -{"id": 45647, "token": "opedic", "merges": "oped ic", "count": 1531, "type": "remove by frequency"} -{"id": 46095, "token": "\u0120Ming", "merges": "\u0120M ing", "count": 1531, "type": "remove by frequency"} -{"id": 37002, "token": "\u0120seedlings", "merges": "\u0120seed lings", "count": 1532, "type": "remove by frequency"} -{"id": 39972, "token": "imester", "merges": "imes ter", "count": 1532, "type": "remove by frequency"} -{"id": 41602, "token": "ulfide", "merges": "ulf ide", "count": 1532, "type": "remove by frequency"} -{"id": 20192, "token": "\u0120pathogen", "merges": "\u0120path ogen", "count": 1533, "type": "remove by frequency"} -{"id": 46211, "token": "Verlag", "merges": "Ver lag", "count": 1534, "type": "remove by frequency"} -{"id": 47798, "token": "enties", "merges": "ent ies", "count": 1534, "type": "remove by frequency"} -{"id": 50008, "token": "\u0120ranc", "merges": "\u0120r anc", "count": 1534, "type": "remove by frequency"} -{"id": 39928, "token": "\u0120psychic", "merges": "\u0120psych ic", "count": 1535, "type": "remove by frequency"} -{"id": 34816, "token": "\u00e0\u00b8\u00a2", "merges": "\u00e0\u00b8 \u00a2", "count": 1535, "type": "remove by frequency"} -{"id": 36052, "token": "\u0120Conduct", "merges": "\u0120Con duct", "count": 1536, "type": "remove by frequency"} -{"id": 41721, "token": "\u0120Tier", "merges": "\u0120T ier", "count": 1536, "type": "remove by frequency"} -{"id": 33806, "token": "\u00c3\u0143c", "merges": "\u00c3\u0143 c", "count": 1536, "type": "remove by frequency"} -{"id": 19281, "token": "\u00c2\u00b3", "merges": "\u00c2 \u00b3", "count": 1536, "type": "remove by frequency"} -{"id": 43070, "token": "acrylate", "merges": "acry late", "count": 1537, "type": "remove by frequency"} -{"id": 42188, "token": "ophan", "merges": "oph an", "count": 1537, "type": "remove by frequency"} -{"id": 40886, "token": "\u0120concave", "merges": "\u0120conc ave", "count": 1538, "type": "remove by frequency"} -{"id": 31833, "token": "\u0120damned", "merges": "\u0120dam ned", "count": 1538, "type": "remove by frequency"} -{"id": 31698, "token": "\u00e3\u0123\u0135\u00e3\u0123\u00ae", "merges": "\u00e3\u0123\u0135 \u00e3\u0123\u00ae", "count": 1538, "type": "remove by frequency"} -{"id": 36613, "token": "brates", "merges": "br ates", "count": 1539, "type": "remove by frequency"} -{"id": 40981, "token": "\u0120drap", "merges": "\u0120d rap", "count": 1539, "type": "remove by frequency"} -{"id": 45469, "token": "\u0120Hip", "merges": "\u0120H ip", "count": 1539, "type": "remove by frequency"} -{"id": 42593, "token": "\u0120Clifford", "merges": "\u0120Cliff ord", "count": 1540, "type": "remove by frequency"} -{"id": 34565, "token": "\u0120Evil", "merges": "\u0120Ev il", "count": 1540, "type": "remove by frequency"} -{"id": 35734, "token": "\u0120IMF", "merges": "\u0120IM F", "count": 1540, "type": "remove by frequency"} -{"id": 31793, "token": "\u0120regularity", "merges": "\u0120regular ity", "count": 1541, "type": "remove by frequency"} -{"id": 37511, "token": "\u0120Friedrich", "merges": "\u0120Fried rich", "count": 1541, "type": "remove by frequency"} -{"id": 31975, "token": "stained", "merges": "st ained", "count": 1541, "type": "remove by frequency"} -{"id": 37855, "token": "partum", "merges": "part um", "count": 1541, "type": "remove by frequency"} -{"id": 48233, "token": "\u0120bully", "merges": "\u0120bul ly", "count": 1541, "type": "remove by frequency"} -{"id": 50149, "token": "\u0120tamp", "merges": "\u0120t amp", "count": 1541, "type": "remove by frequency"} -{"id": 18300, "token": "\u0120Statistical", "merges": "\u0120Stat istical", "count": 1542, "type": "remove by frequency"} -{"id": 40963, "token": "\u0120cosa", "merges": "\u0120c osa", "count": 1542, "type": "remove by frequency"} -{"id": 15904, "token": "\u0120Bureau", "merges": "\u0120B ureau", "count": 1543, "type": "remove by frequency"} -{"id": 13114, "token": "\u0120Ryan", "merges": "\u0120R yan", "count": 1543, "type": "remove by frequency"} -{"id": 29550, "token": "\u0120frowned", "merges": "\u0120f rowned", "count": 1544, "type": "remove by frequency"} -{"id": 49654, "token": "\u0120doubtless", "merges": "\u0120doubt less", "count": 1545, "type": "remove by frequency"} -{"id": 41568, "token": "ionine", "merges": "ion ine", "count": 1545, "type": "remove by frequency"} -{"id": 22707, "token": "\u0120Lisa", "merges": "\u0120L isa", "count": 1545, "type": "remove by frequency"} -{"id": 36619, "token": "\u0120Beer", "merges": "\u0120Be er", "count": 1545, "type": "remove by frequency"} -{"id": 41511, "token": "%%%%%%%%%%%%%%%%", "merges": "%%%%%%%% %%%%%%%%", "count": 1546, "type": "remove by frequency"} -{"id": 41805, "token": "\u0120Effective", "merges": "\u0120Effect ive", "count": 1546, "type": "remove by frequency"} -{"id": 44116, "token": "\u0120Cheng", "merges": "\u0120Chen g", "count": 1546, "type": "remove by frequency"} -{"id": 33218, "token": "\u0120prejudicial", "merges": "\u0120prejud icial", "count": 1547, "type": "remove by frequency"} -{"id": 49898, "token": "\u0120spines", "merges": "\u0120sp ines", "count": 1547, "type": "remove by frequency"} -{"id": 36923, "token": "\u0120Marshal", "merges": "\u0120Mars hal", "count": 1548, "type": "remove by frequency"} -{"id": 32069, "token": "\u0120Pav", "merges": "\u0120P av", "count": 1548, "type": "remove by frequency"} -{"id": 28608, "token": "\u0120Cer", "merges": "\u0120C er", "count": 1548, "type": "remove by frequency"} -{"id": 35008, "token": "\u0120mechanically", "merges": "\u0120mechan ically", "count": 1549, "type": "remove by frequency"} -{"id": 45620, "token": "\u0120tenderness", "merges": "\u0120tender ness", "count": 1549, "type": "remove by frequency"} -{"id": 49888, "token": "doctoral", "merges": "doctor al", "count": 1549, "type": "remove by frequency"} -{"id": 32420, "token": "\u0120tucked", "merges": "\u0120t ucked", "count": 1549, "type": "remove by frequency"} -{"id": 42345, "token": "\u0120skinny", "merges": "\u0120sk inny", "count": 1549, "type": "remove by frequency"} -{"id": 43974, "token": "\u0120Fruit", "merges": "\u0120F ruit", "count": 1549, "type": "remove by frequency"} -{"id": 39787, "token": "\u0120gasped", "merges": "\u0120gas ped", "count": 1550, "type": "remove by frequency"} -{"id": 38889, "token": "\u00c3\u00a9ment", "merges": "\u00c3\u00a9 ment", "count": 1550, "type": "remove by frequency"} -{"id": 44412, "token": "\u0120rumor", "merges": "\u0120rum or", "count": 1550, "type": "remove by frequency"} -{"id": 37563, "token": "\u0120autopsy", "merges": "\u0120aut opsy", "count": 1551, "type": "remove by frequency"} -{"id": 11934, "token": "\u0120malign", "merges": "\u0120mal ign", "count": 1551, "type": "remove by frequency"} -{"id": 17673, "token": "$),", "merges": "$ ),", "count": 1551, "type": "remove by frequency"} -{"id": 19295, "token": "\u0120SOFTWARE", "merges": "\u0120SO FTWARE", "count": 1552, "type": "remove by frequency"} -{"id": 37178, "token": "\u0120gelatin", "merges": "\u0120gel atin", "count": 1552, "type": "remove by frequency"} -{"id": 31345, "token": "\u0120lumbar", "merges": "\u0120lum bar", "count": 1552, "type": "remove by frequency"} -{"id": 40681, "token": "otoxic", "merges": "otox ic", "count": 1552, "type": "remove by frequency"} -{"id": 12300, "token": "\u0120Rome", "merges": "\u0120R ome", "count": 1552, "type": "remove by frequency"} -{"id": 19173, "token": "\u0120Eli", "merges": "\u0120E li", "count": 1553, "type": "remove by frequency"} -{"id": 26926, "token": "\u0120\u00c3\u00a9l", "merges": "\u0120\u00c3\u00a9 l", "count": 1553, "type": "remove by frequency"} -{"id": 35309, "token": "\u0120Gad", "merges": "\u0120G ad", "count": 1553, "type": "remove by frequency"} -{"id": 44888, "token": "\u0120924", "merges": "\u01209 24", "count": 1553, "type": "remove by frequency"} -{"id": 37268, "token": "\u0120Dear", "merges": "\u0120D ear", "count": 1554, "type": "remove by frequency"} -{"id": 48518, "token": "\u0120loaf", "merges": "\u0120lo af", "count": 1554, "type": "remove by frequency"} -{"id": 33588, "token": "\u0120piez", "merges": "\u0120pie z", "count": 1555, "type": "remove by frequency"} -{"id": 6476, "token": "\\!", "merges": "\\ !", "count": 1555, "type": "remove by frequency"} -{"id": 41626, "token": "\u0120stipulation", "merges": "\u0120stip ulation", "count": 1556, "type": "remove by frequency"} -{"id": 4935, "token": "}}(", "merges": "}} (", "count": 1556, "type": "remove by frequency"} -{"id": 33962, "token": "\u0120Soci", "merges": "\u0120S oci", "count": 1557, "type": "remove by frequency"} -{"id": 17012, "token": "\u0120Treatment", "merges": "\u0120T reatment", "count": 1558, "type": "remove by frequency"} -{"id": 43170, "token": "\u0120lettuce", "merges": "\u0120lett uce", "count": 1558, "type": "remove by frequency"} -{"id": 49329, "token": "\u0120Spons", "merges": "\u0120Sp ons", "count": 1558, "type": "remove by frequency"} -{"id": 24756, "token": "\u0120Hob", "merges": "\u0120H ob", "count": 1558, "type": "remove by frequency"} -{"id": 8293, "token": "\u0120Justice", "merges": "\u0120Just ice", "count": 1559, "type": "remove by frequency"} -{"id": 43219, "token": "\u0120rocking", "merges": "\u0120rock ing", "count": 1559, "type": "remove by frequency"} -{"id": 39565, "token": "\u0120fungus", "merges": "\u0120fun gus", "count": 1559, "type": "remove by frequency"} -{"id": 29108, "token": "\u0120glands", "merges": "\u0120gl ands", "count": 1559, "type": "remove by frequency"} -{"id": 16993, "token": "queous", "merges": "que ous", "count": 1559, "type": "remove by frequency"} -{"id": 24369, "token": "\u0120Coul", "merges": "\u0120C oul", "count": 1559, "type": "remove by frequency"} -{"id": 42185, "token": "\u0120sei", "merges": "\u0120se i", "count": 1559, "type": "remove by frequency"} -{"id": 20118, "token": "\u0120antioxidant", "merges": "\u0120antioxid ant", "count": 1560, "type": "remove by frequency"} -{"id": 28762, "token": "\u0120pleadings", "merges": "\u0120plead ings", "count": 1560, "type": "remove by frequency"} -{"id": 47378, "token": "\u0120ejected", "merges": "\u0120e jected", "count": 1560, "type": "remove by frequency"} -{"id": 28436, "token": "\u0120Bring", "merges": "\u0120B ring", "count": 1560, "type": "remove by frequency"} -{"id": 36962, "token": "\u0120superhero", "merges": "\u0120super hero", "count": 1561, "type": "remove by frequency"} -{"id": 36322, "token": "\u0120Resort", "merges": "\u0120Res ort", "count": 1561, "type": "remove by frequency"} -{"id": 21797, "token": "rosine", "merges": "ros ine", "count": 1561, "type": "remove by frequency"} -{"id": 41542, "token": "\u0120gigg", "merges": "\u0120gig g", "count": 1561, "type": "remove by frequency"} -{"id": 30825, "token": "\u0120carbohydrate", "merges": "\u0120carboh ydrate", "count": 1562, "type": "remove by frequency"} -{"id": 33200, "token": "rivol", "merges": "riv ol", "count": 1562, "type": "remove by frequency"} -{"id": 32207, "token": "\u0120ICC", "merges": "\u0120I CC", "count": 1562, "type": "remove by frequency"} -{"id": 4596, "token": "\u0120November", "merges": "\u0120N ovember", "count": 1563, "type": "remove by frequency"} -{"id": 24351, "token": "\u0120bronch", "merges": "\u0120bron ch", "count": 1563, "type": "remove by frequency"} -{"id": 40394, "token": "\u0120Workshop", "merges": "\u0120Works hop", "count": 1564, "type": "remove by frequency"} -{"id": 39784, "token": "\u0120TeV", "merges": "\u0120Te V", "count": 1564, "type": "remove by frequency"} -{"id": 46060, "token": "\u0120purpos", "merges": "\u0120pur pos", "count": 1565, "type": "remove by frequency"} -{"id": 38285, "token": "entary", "merges": "ent ary", "count": 1565, "type": "remove by frequency"} -{"id": 21832, "token": "\u0120Trail", "merges": "\u0120Tra il", "count": 1565, "type": "remove by frequency"} -{"id": 36197, "token": "\u00c3\u00a9mon", "merges": "\u00c3\u00a9 mon", "count": 1565, "type": "remove by frequency"} -{"id": 25400, "token": "ophila", "merges": "oph ila", "count": 1566, "type": "remove by frequency"} -{"id": 21088, "token": "\u0120Marie", "merges": "\u0120Mar ie", "count": 1566, "type": "remove by frequency"} -{"id": 31121, "token": "\u0120Mong", "merges": "\u0120M ong", "count": 1566, "type": "remove by frequency"} -{"id": 8454, "token": "\u0120_{", "merges": "\u0120 _{", "count": 1566, "type": "remove by frequency"} -{"id": 45942, "token": "\u0120chilling", "merges": "\u0120ch illing", "count": 1567, "type": "remove by frequency"} -{"id": 9693, "token": "\u0120Boston", "merges": "\u0120B oston", "count": 1567, "type": "remove by frequency"} -{"id": 10142, "token": "\u0120inflammatory", "merges": "\u0120infl ammatory", "count": 1568, "type": "remove by frequency"} -{"id": 36199, "token": "\u0120resemblance", "merges": "\u0120resem blance", "count": 1568, "type": "remove by frequency"} -{"id": 27000, "token": "\u0120mellitus", "merges": "\u0120mell itus", "count": 1568, "type": "remove by frequency"} -{"id": 32489, "token": "\u0120drawback", "merges": "\u0120draw back", "count": 1568, "type": "remove by frequency"} -{"id": 20399, "token": "\u0120hydroly", "merges": "\u0120hydro ly", "count": 1568, "type": "remove by frequency"} -{"id": 47145, "token": "\u0120docket", "merges": "\u0120d ocket", "count": 1569, "type": "remove by frequency"} -{"id": 44458, "token": "dling", "merges": "d ling", "count": 1569, "type": "remove by frequency"} -{"id": 31419, "token": "\u0120Mik", "merges": "\u0120M ik", "count": 1569, "type": "remove by frequency"} -{"id": 33746, "token": "\u0120Gust", "merges": "\u0120G ust", "count": 1570, "type": "remove by frequency"} -{"id": 42116, "token": "\u0120quin", "merges": "\u0120qu in", "count": 1570, "type": "remove by frequency"} -{"id": 47787, "token": "interacting", "merges": "inter acting", "count": 1571, "type": "remove by frequency"} -{"id": 35192, "token": "\u0120elicit", "merges": "\u0120el icit", "count": 1571, "type": "remove by frequency"} -{"id": 27437, "token": "\u0120sein", "merges": "\u0120se in", "count": 1571, "type": "remove by frequency"} -{"id": 43715, "token": "\u00c3\u00bct", "merges": "\u00c3\u00bc t", "count": 1571, "type": "remove by frequency"} -{"id": 32079, "token": "\u0120Industries", "merges": "\u0120Indust ries", "count": 1572, "type": "remove by frequency"} -{"id": 49195, "token": "\u0120amphib", "merges": "\u0120amph ib", "count": 1572, "type": "remove by frequency"} -{"id": 47739, "token": "\u0120Grow", "merges": "\u0120G row", "count": 1572, "type": "remove by frequency"} -{"id": 45113, "token": "\u0120comforting", "merges": "\u0120comfort ing", "count": 1573, "type": "remove by frequency"} -{"id": 46754, "token": "\u0120bouncing", "merges": "\u0120b ouncing", "count": 1573, "type": "remove by frequency"} -{"id": 39975, "token": "\u0120insufficiency", "merges": "\u0120insu fficiency", "count": 1574, "type": "remove by frequency"} -{"id": 37871, "token": "\u0120twisting", "merges": "\u0120tw isting", "count": 1574, "type": "remove by frequency"} -{"id": 47214, "token": "\u0120hunted", "merges": "\u0120h unted", "count": 1574, "type": "remove by frequency"} -{"id": 45793, "token": "\u0120cleft", "merges": "\u0120c left", "count": 1574, "type": "remove by frequency"} -{"id": 42391, "token": "\u0120alkali", "merges": "\u0120alk ali", "count": 1575, "type": "remove by frequency"} -{"id": 37322, "token": ">).", "merges": "> ).", "count": 1575, "type": "remove by frequency"} -{"id": 30972, "token": "\u0120Shen", "merges": "\u0120S hen", "count": 1576, "type": "remove by frequency"} -{"id": 43257, "token": "\u0120unfavorable", "merges": "\u0120unf avorable", "count": 1577, "type": "remove by frequency"} -{"id": 21003, "token": "\u0120Einstein", "merges": "\u0120E instein", "count": 1577, "type": "remove by frequency"} -{"id": 31841, "token": "\">&", "merges": "\"> &", "count": 1577, "type": "remove by frequency"} -{"id": 35345, "token": "\u0120Nucle", "merges": "\u0120N ucle", "count": 1578, "type": "remove by frequency"} -{"id": 42417, "token": "\u0120undercover", "merges": "\u0120under cover", "count": 1579, "type": "remove by frequency"} -{"id": 29799, "token": "\u0120encore", "merges": "\u0120enc ore", "count": 1579, "type": "remove by frequency"} -{"id": 21925, "token": "\u0120Pant", "merges": "\u0120P ant", "count": 1579, "type": "remove by frequency"} -{"id": 18773, "token": "\u0120Yan", "merges": "\u0120Y an", "count": 1579, "type": "remove by frequency"} -{"id": 11218, "token": "\u0120Championship", "merges": "\u0120Ch ampionship", "count": 1580, "type": "remove by frequency"} -{"id": 19789, "token": "\u0120metabolites", "merges": "\u0120metabol ites", "count": 1580, "type": "remove by frequency"} -{"id": 18746, "token": "\u0120STATES", "merges": "\u0120STAT ES", "count": 1580, "type": "remove by frequency"} -{"id": 42437, "token": "ructose", "merges": "ruct ose", "count": 1580, "type": "remove by frequency"} -{"id": 46531, "token": "\u0120cavern", "merges": "\u0120ca vern", "count": 1580, "type": "remove by frequency"} -{"id": 46233, "token": "\u00c4\u0133", "merges": "\u00c4 \u0133", "count": 1580, "type": "remove by frequency"} -{"id": 22695, "token": "\u0120Commons", "merges": "\u0120Comm ons", "count": 1581, "type": "remove by frequency"} -{"id": 40467, "token": "elij", "merges": "el ij", "count": 1581, "type": "remove by frequency"} -{"id": 21174, "token": "\u0120Kam", "merges": "\u0120K am", "count": 1581, "type": "remove by frequency"} -{"id": 34646, "token": "\u0120Than", "merges": "\u0120Th an", "count": 1582, "type": "remove by frequency"} -{"id": 36256, "token": "\u0120catastrophic", "merges": "\u0120catast rophic", "count": 1583, "type": "remove by frequency"} -{"id": 43859, "token": "\u0120Sheets", "merges": "\u0120She ets", "count": 1583, "type": "remove by frequency"} -{"id": 24999, "token": "\u0120Shaw", "merges": "\u0120Sh aw", "count": 1583, "type": "remove by frequency"} -{"id": 20358, "token": "\u0120Tob", "merges": "\u0120T ob", "count": 1583, "type": "remove by frequency"} -{"id": 37472, "token": "\u0120h\u00c3\u00a1", "merges": "\u0120h \u00c3\u00a1", "count": 1583, "type": "remove by frequency"} -{"id": 40704, "token": "\u0120hugely", "merges": "\u0120hug ely", "count": 1584, "type": "remove by frequency"} -{"id": 45654, "token": "\u0120intrinsically", "merges": "\u0120intrins ically", "count": 1585, "type": "remove by frequency"} -{"id": 29836, "token": "conviction", "merges": "conv iction", "count": 1585, "type": "remove by frequency"} -{"id": 34796, "token": "\u0120aerobic", "merges": "\u0120aer obic", "count": 1585, "type": "remove by frequency"} -{"id": 38102, "token": "\u0120Bean", "merges": "\u0120Be an", "count": 1585, "type": "remove by frequency"} -{"id": 35218, "token": "\u0120Dob", "merges": "\u0120D ob", "count": 1585, "type": "remove by frequency"} -{"id": 26749, "token": "\u0120renormal", "merges": "\u0120ren ormal", "count": 1586, "type": "remove by frequency"} -{"id": 24136, "token": "\u0120Workers", "merges": "\u0120Work ers", "count": 1586, "type": "remove by frequency"} -{"id": 36866, "token": "\u0120fetus", "merges": "\u0120fet us", "count": 1586, "type": "remove by frequency"} -{"id": 49755, "token": "=${", "merges": "= ${", "count": 1586, "type": "remove by frequency"} -{"id": 23983, "token": "\u0120?>", "merges": "\u0120? >", "count": 1587, "type": "remove by frequency"} -{"id": 41581, "token": "\u00e3\u0123\u013c", "merges": "\u00e3\u0123 \u013c", "count": 1587, "type": "remove by frequency"} -{"id": 28098, "token": "\u0120Bir", "merges": "\u0120B ir", "count": 1588, "type": "remove by frequency"} -{"id": 50370, "token": "\u00e2\u0138\u00bd", "merges": "\u00e2\u0138 \u00bd", "count": 1588, "type": "remove by frequency"} -{"id": 37418, "token": "\u0120Iz", "merges": "\u0120I z", "count": 1589, "type": "remove by frequency"} -{"id": 11007, "token": "\u0120Chris", "merges": "\u0120Ch ris", "count": 1590, "type": "remove by frequency"} -{"id": 48136, "token": "\u0120Evening", "merges": "\u0120Even ing", "count": 1591, "type": "remove by frequency"} -{"id": 45975, "token": "\u0120butcher", "merges": "\u0120but cher", "count": 1591, "type": "remove by frequency"} -{"id": 45027, "token": "\u0120quilt", "merges": "\u0120qu ilt", "count": 1591, "type": "remove by frequency"} -{"id": 42758, "token": "\u0120hace", "merges": "\u0120h ace", "count": 1591, "type": "remove by frequency"} -{"id": 35080, "token": "\u00c3\u00a8ne", "merges": "\u00c3\u00a8 ne", "count": 1591, "type": "remove by frequency"} -{"id": 21796, "token": "\u00c3\u00b8r", "merges": "\u00c3\u00b8 r", "count": 1591, "type": "remove by frequency"} -{"id": 4287, "token": "rightarrow", "merges": "right arrow", "count": 1592, "type": "remove by frequency"} -{"id": 45398, "token": "\u0120Fighter", "merges": "\u0120F ighter", "count": 1592, "type": "remove by frequency"} -{"id": 26064, "token": "\u0120Lud", "merges": "\u0120L ud", "count": 1592, "type": "remove by frequency"} -{"id": 47614, "token": "ellt", "merges": "ell t", "count": 1592, "type": "remove by frequency"} -{"id": 18402, "token": "\u0120Hem", "merges": "\u0120H em", "count": 1592, "type": "remove by frequency"} -{"id": 33062, "token": "\u0120Applying", "merges": "\u0120App lying", "count": 1593, "type": "remove by frequency"} -{"id": 27067, "token": "\u0120implants", "merges": "\u0120impl ants", "count": 1593, "type": "remove by frequency"} -{"id": 41238, "token": "\u0120PROF", "merges": "\u0120PR OF", "count": 1593, "type": "remove by frequency"} -{"id": 30333, "token": "\u0120functionally", "merges": "\u0120function ally", "count": 1594, "type": "remove by frequency"} -{"id": 43043, "token": "\u0120\u00d1\u0123\u00d0\u00b0\u00d0\u00bc", "merges": "\u0120\u00d1\u0123 \u00d0\u00b0\u00d0\u00bc", "count": 1594, "type": "remove by frequency"} -{"id": 36348, "token": "\u0120dyes", "merges": "\u0120d yes", "count": 1594, "type": "remove by frequency"} -{"id": 42664, "token": "\u00c3\u00bck", "merges": "\u00c3\u00bc k", "count": 1594, "type": "remove by frequency"} -{"id": 15197, "token": "\u0120Howard", "merges": "\u0120How ard", "count": 1595, "type": "remove by frequency"} -{"id": 36984, "token": "\u0120sugars", "merges": "\u0120sug ars", "count": 1595, "type": "remove by frequency"} -{"id": 45878, "token": "\u0120Productions", "merges": "\u0120Produ ctions", "count": 1596, "type": "remove by frequency"} -{"id": 34885, "token": "\u0120minimally", "merges": "\u0120minim ally", "count": 1596, "type": "remove by frequency"} -{"id": 19498, "token": "\u0120Stewart", "merges": "\u0120Ste wart", "count": 1596, "type": "remove by frequency"} -{"id": 20574, "token": "\u0120Freedom", "merges": "\u0120Fre edom", "count": 1597, "type": "remove by frequency"} -{"id": 31449, "token": "\u0120Motors", "merges": "\u0120Mot ors", "count": 1597, "type": "remove by frequency"} -{"id": 23724, "token": "\u0120Woman", "merges": "\u0120W oman", "count": 1597, "type": "remove by frequency"} -{"id": 24268, "token": "\u0120sono", "merges": "\u0120son o", "count": 1597, "type": "remove by frequency"} -{"id": 49003, "token": "\u00e2\u0122\u012e", "merges": "\u00e2\u0122 \u012e", "count": 1597, "type": "remove by frequency"} -{"id": 41411, "token": "\u00c3\u00a1c", "merges": "\u00c3\u00a1 c", "count": 1597, "type": "remove by frequency"} -{"id": 43499, "token": "\u0120VERY", "merges": "\u0120V ERY", "count": 1598, "type": "remove by frequency"} -{"id": 31106, "token": "\u0120abelian", "merges": "\u0120ab elian", "count": 1599, "type": "remove by frequency"} -{"id": 10255, "token": "\u0120Columb", "merges": "\u0120Col umb", "count": 1599, "type": "remove by frequency"} -{"id": 49943, "token": "\u0120shroud", "merges": "\u0120shr oud", "count": 1599, "type": "remove by frequency"} -{"id": 17637, "token": ")+\\", "merges": ") +\\", "count": 1599, "type": "remove by frequency"} -{"id": 39883, "token": "\u0120pounding", "merges": "\u0120p ounding", "count": 1600, "type": "remove by frequency"} -{"id": 50090, "token": "\u0120behaving", "merges": "\u0120behav ing", "count": 1600, "type": "remove by frequency"} -{"id": 50131, "token": "\u0120assortment", "merges": "\u0120assort ment", "count": 1601, "type": "remove by frequency"} -{"id": 32411, "token": "\u0120feminist", "merges": "\u0120femin ist", "count": 1601, "type": "remove by frequency"} -{"id": 43390, "token": "\u0120claws", "merges": "\u0120cl aws", "count": 1601, "type": "remove by frequency"} -{"id": 43370, "token": "\u0120guts", "merges": "\u0120gut s", "count": 1601, "type": "remove by frequency"} -{"id": 42798, "token": "\u0120unbelievable", "merges": "\u0120unbelie vable", "count": 1602, "type": "remove by frequency"} -{"id": 42115, "token": "\u0120inductive", "merges": "\u0120induct ive", "count": 1602, "type": "remove by frequency"} -{"id": 37312, "token": "\u0120vra", "merges": "\u0120v ra", "count": 1602, "type": "remove by frequency"} -{"id": 44099, "token": "\u0120incarcerated", "merges": "\u0120incarcer ated", "count": 1603, "type": "remove by frequency"} -{"id": 21525, "token": "\u0120Taking", "merges": "\u0120T aking", "count": 1603, "type": "remove by frequency"} -{"id": 40095, "token": "\u0120oxides", "merges": "\u0120ox ides", "count": 1603, "type": "remove by frequency"} -{"id": 43754, "token": "\u0120Electronics", "merges": "\u0120Electron ics", "count": 1604, "type": "remove by frequency"} -{"id": 20716, "token": "\u0120untreated", "merges": "\u0120un treated", "count": 1604, "type": "remove by frequency"} -{"id": 31672, "token": "\u0120cathode", "merges": "\u0120cath ode", "count": 1604, "type": "remove by frequency"} -{"id": 27361, "token": "\u0120cohorts", "merges": "\u0120coh orts", "count": 1604, "type": "remove by frequency"} -{"id": 19547, "token": "\u0120Sams", "merges": "\u0120S ams", "count": 1604, "type": "remove by frequency"} -{"id": 46197, "token": "\u0120paranoid", "merges": "\u0120paran oid", "count": 1605, "type": "remove by frequency"} -{"id": 47971, "token": "chst", "merges": "ch st", "count": 1605, "type": "remove by frequency"} -{"id": 13626, "token": "\u0120PBS", "merges": "\u0120P BS", "count": 1605, "type": "remove by frequency"} -{"id": 45402, "token": "\u0120scratched", "merges": "\u0120scr atched", "count": 1606, "type": "remove by frequency"} -{"id": 41640, "token": "\u0120Liquid", "merges": "\u0120Liqu id", "count": 1606, "type": "remove by frequency"} -{"id": 23068, "token": "\u00e0\u00a4\u00a8", "merges": "\u00e0\u00a4 \u00a8", "count": 1606, "type": "remove by frequency"} -{"id": 45646, "token": "\u0120politely", "merges": "\u0120polit ely", "count": 1607, "type": "remove by frequency"} -{"id": 31075, "token": "\u0120Cultural", "merges": "\u0120C ultural", "count": 1607, "type": "remove by frequency"} -{"id": 46086, "token": "\u0120experimenting", "merges": "\u0120experiment ing", "count": 1608, "type": "remove by frequency"} -{"id": 25535, "token": "\u0120spectrometry", "merges": "\u0120spectrom etry", "count": 1608, "type": "remove by frequency"} -{"id": 35424, "token": "\u0120ging", "merges": "\u0120g ing", "count": 1608, "type": "remove by frequency"} -{"id": 12272, "token": "\u00e1\u00bc", "merges": "\u00e1 \u00bc", "count": 1608, "type": "remove by frequency"} -{"id": 41702, "token": "\u0120Reality", "merges": "\u0120Re ality", "count": 1609, "type": "remove by frequency"} -{"id": 24744, "token": "\u0120Really", "merges": "\u0120Re ally", "count": 1609, "type": "remove by frequency"} -{"id": 42447, "token": "ycler", "merges": "ycl er", "count": 1609, "type": "remove by frequency"} -{"id": 33393, "token": "\u0120Securities", "merges": "\u0120Sec urities", "count": 1610, "type": "remove by frequency"} -{"id": 30245, "token": "\u0120conceded", "merges": "\u0120conced ed", "count": 1610, "type": "remove by frequency"} -{"id": 13151, "token": "othelial", "merges": "ot helial", "count": 1610, "type": "remove by frequency"} -{"id": 13717, "token": "\u0120marijuana", "merges": "\u0120m arijuana", "count": 1611, "type": "remove by frequency"} -{"id": 47947, "token": "\u0120hallmark", "merges": "\u0120hall mark", "count": 1611, "type": "remove by frequency"} -{"id": 45628, "token": "\u0120occupant", "merges": "\u0120occup ant", "count": 1611, "type": "remove by frequency"} -{"id": 44969, "token": "\u0120Mega", "merges": "\u0120M ega", "count": 1611, "type": "remove by frequency"} -{"id": 32131, "token": "'}\\", "merges": "' }\\", "count": 1611, "type": "remove by frequency"} -{"id": 37643, "token": "\u0120immer", "merges": "\u0120im mer", "count": 1612, "type": "remove by frequency"} -{"id": 47599, "token": "itum", "merges": "it um", "count": 1612, "type": "remove by frequency"} -{"id": 40296, "token": "\u0120semic", "merges": "\u0120sem ic", "count": 1613, "type": "remove by frequency"} -{"id": 27884, "token": "\u0120s\u00c3\u00b3", "merges": "\u0120s \u00c3\u00b3", "count": 1614, "type": "remove by frequency"} -{"id": 25958, "token": "\u0120Bak", "merges": "\u0120B ak", "count": 1614, "type": "remove by frequency"} -{"id": 46616, "token": "\u0120silenced", "merges": "\u0120sil enced", "count": 1615, "type": "remove by frequency"} -{"id": 28006, "token": "\u0120idiot", "merges": "\u0120id iot", "count": 1615, "type": "remove by frequency"} -{"id": 47274, "token": "\u0120bum", "merges": "\u0120b um", "count": 1615, "type": "remove by frequency"} -{"id": 47148, "token": "\u0120zal", "merges": "\u0120z al", "count": 1615, "type": "remove by frequency"} -{"id": 34723, "token": "\u0120Published", "merges": "\u0120P ublished", "count": 1616, "type": "remove by frequency"} -{"id": 28643, "token": "aryngeal", "merges": "arynge al", "count": 1616, "type": "remove by frequency"} -{"id": 5396, "token": "\u0120Indian", "merges": "\u0120Ind ian", "count": 1616, "type": "remove by frequency"} -{"id": 49514, "token": "ocious", "merges": "oc ious", "count": 1617, "type": "remove by frequency"} -{"id": 47765, "token": "\u0120stunt", "merges": "\u0120st unt", "count": 1617, "type": "remove by frequency"} -{"id": 45718, "token": "\u0120wsp", "merges": "\u0120w sp", "count": 1617, "type": "remove by frequency"} -{"id": 20809, "token": "bmatrix", "merges": "b matrix", "count": 1618, "type": "remove by frequency"} -{"id": 45969, "token": "autre", "merges": "aut re", "count": 1618, "type": "remove by frequency"} -{"id": 43081, "token": "\u0120disruptive", "merges": "\u0120disrupt ive", "count": 1619, "type": "remove by frequency"} -{"id": 42943, "token": "\u0120Defender", "merges": "\u0120Def ender", "count": 1619, "type": "remove by frequency"} -{"id": 45361, "token": "\u0120killers", "merges": "\u0120kill ers", "count": 1619, "type": "remove by frequency"} -{"id": 14468, "token": "\u0120Brian", "merges": "\u0120B rian", "count": 1619, "type": "remove by frequency"} -{"id": 49204, "token": "\u0120defeats", "merges": "\u0120defe ats", "count": 1620, "type": "remove by frequency"} -{"id": 22456, "token": "\u0120Rogers", "merges": "\u0120Rog ers", "count": 1620, "type": "remove by frequency"} -{"id": 27063, "token": "suppl", "merges": "supp l", "count": 1620, "type": "remove by frequency"} -{"id": 24839, "token": "\u0120Ton", "merges": "\u0120T on", "count": 1620, "type": "remove by frequency"} -{"id": 31645, "token": "\u0120Shi", "merges": "\u0120Sh i", "count": 1620, "type": "remove by frequency"} -{"id": 45425, "token": "\u0120propagator", "merges": "\u0120propag ator", "count": 1621, "type": "remove by frequency"} -{"id": 27110, "token": "\u0120asymmetry", "merges": "\u0120asym metry", "count": 1621, "type": "remove by frequency"} -{"id": 20068, "token": "\u0120amplified", "merges": "\u0120ampl ified", "count": 1622, "type": "remove by frequency"} -{"id": 36187, "token": "\u0120chees", "merges": "\u0120che es", "count": 1622, "type": "remove by frequency"} -{"id": 47498, "token": "\u0120deity", "merges": "\u0120de ity", "count": 1622, "type": "remove by frequency"} -{"id": 14961, "token": "\u0120myocard", "merges": "\u0120my ocard", "count": 1623, "type": "remove by frequency"} -{"id": 25519, "token": "\u0120reagent", "merges": "\u0120re agent", "count": 1623, "type": "remove by frequency"} -{"id": 43605, "token": "\u0120Virt", "merges": "\u0120V irt", "count": 1623, "type": "remove by frequency"} -{"id": 37811, "token": "\u0120Away", "merges": "\u0120A way", "count": 1623, "type": "remove by frequency"} -{"id": 43347, "token": "\u0120hors", "merges": "\u0120h ors", "count": 1623, "type": "remove by frequency"} -{"id": 25783, "token": "\u0120isomorphic", "merges": "\u0120is omorphic", "count": 1624, "type": "remove by frequency"} -{"id": 26893, "token": "\u0120iconic", "merges": "\u0120icon ic", "count": 1624, "type": "remove by frequency"} -{"id": 27808, "token": "\u0120Pour", "merges": "\u0120P our", "count": 1624, "type": "remove by frequency"} -{"id": 18820, "token": "\u0120antibiotic", "merges": "\u0120antib iotic", "count": 1625, "type": "remove by frequency"} -{"id": 25609, "token": "oresis", "merges": "ores is", "count": 1625, "type": "remove by frequency"} -{"id": 29632, "token": "\u0120WHAT", "merges": "\u0120WH AT", "count": 1626, "type": "remove by frequency"} -{"id": 20781, "token": "\u0120k\u00c3\u00b6", "merges": "\u0120k \u00c3\u00b6", "count": 1626, "type": "remove by frequency"} -{"id": 32415, "token": "\u0120Basically", "merges": "\u0120Bas ically", "count": 1628, "type": "remove by frequency"} -{"id": 5759, "token": "\u0120Congress", "merges": "\u0120Cong ress", "count": 1628, "type": "remove by frequency"} -{"id": 18410, "token": "\u0120Maj", "merges": "\u0120M aj", "count": 1628, "type": "remove by frequency"} -{"id": 47892, "token": "\u0120cynical", "merges": "\u0120cyn ical", "count": 1629, "type": "remove by frequency"} -{"id": 19048, "token": "\u0120VII", "merges": "\u0120V II", "count": 1629, "type": "remove by frequency"} -{"id": 32943, "token": "\u0120hospitalized", "merges": "\u0120hospital ized", "count": 1630, "type": "remove by frequency"} -{"id": 39554, "token": "\u0120Criteria", "merges": "\u0120Crit eria", "count": 1630, "type": "remove by frequency"} -{"id": 48009, "token": "\u0120impulses", "merges": "\u0120imp ulses", "count": 1630, "type": "remove by frequency"} -{"id": 26168, "token": "omeric", "merges": "om eric", "count": 1630, "type": "remove by frequency"} -{"id": 7048, "token": "\u0120appellant", "merges": "\u0120app ellant", "count": 1631, "type": "remove by frequency"} -{"id": 26755, "token": "\u0120Places", "merges": "\u0120Pl aces", "count": 1631, "type": "remove by frequency"} -{"id": 21730, "token": "\u0120schizophren", "merges": "\u0120sch izophren", "count": 1632, "type": "remove by frequency"} -{"id": 31615, "token": "plectic", "merges": "ple ctic", "count": 1632, "type": "remove by frequency"} -{"id": 49365, "token": "\u0120Xavier", "merges": "\u0120X avier", "count": 1632, "type": "remove by frequency"} -{"id": 46908, "token": "\u0120unbe", "merges": "\u0120un be", "count": 1632, "type": "remove by frequency"} -{"id": 24941, "token": "\u0120Kur", "merges": "\u0120K ur", "count": 1632, "type": "remove by frequency"} -{"id": 35679, "token": "accharides", "merges": "acchar ides", "count": 1633, "type": "remove by frequency"} -{"id": 39561, "token": "\u0120Borg", "merges": "\u0120B org", "count": 1633, "type": "remove by frequency"} -{"id": 24537, "token": "\u0120innate", "merges": "\u0120inn ate", "count": 1634, "type": "remove by frequency"} -{"id": 38869, "token": "aldehyde", "merges": "al dehyde", "count": 1635, "type": "remove by frequency"} -{"id": 39825, "token": "\u0120ventil", "merges": "\u0120vent il", "count": 1635, "type": "remove by frequency"} -{"id": 47465, "token": "\u0120darkened", "merges": "\u0120dark ened", "count": 1636, "type": "remove by frequency"} -{"id": 47519, "token": "\u00e3\u0124\u00bf\u00e3\u0125\u00bc", "merges": "\u00e3\u0124\u00bf \u00e3\u0125\u00bc", "count": 1636, "type": "remove by frequency"} -{"id": 22662, "token": "\u0120Marc", "merges": "\u0120Mar c", "count": 1636, "type": "remove by frequency"} -{"id": 21561, "token": "\u0120Discussion", "merges": "\u0120D iscussion", "count": 1637, "type": "remove by frequency"} -{"id": 28741, "token": "\u0120intestine", "merges": "\u0120int estine", "count": 1637, "type": "remove by frequency"} -{"id": 43252, "token": "\u0120Kindle", "merges": "\u0120K indle", "count": 1637, "type": "remove by frequency"} -{"id": 16982, "token": "\u0120Nich", "merges": "\u0120N ich", "count": 1637, "type": "remove by frequency"} -{"id": 12743, "token": "\u0120Officer", "merges": "\u0120Offic er", "count": 1638, "type": "remove by frequency"} -{"id": 46677, "token": "alanine", "merges": "al anine", "count": 1638, "type": "remove by frequency"} -{"id": 44102, "token": "\u0120Mining", "merges": "\u0120M ining", "count": 1638, "type": "remove by frequency"} -{"id": 28376, "token": "\u0120preg", "merges": "\u0120pre g", "count": 1638, "type": "remove by frequency"} -{"id": 41059, "token": "\u0120NEWS", "merges": "\u0120NE WS", "count": 1638, "type": "remove by frequency"} -{"id": 40393, "token": "\u0120Acid", "merges": "\u0120Ac id", "count": 1638, "type": "remove by frequency"} -{"id": 47485, "token": "\u0120fashioned", "merges": "\u0120fashion ed", "count": 1639, "type": "remove by frequency"} -{"id": 30179, "token": "\u0120singularity", "merges": "\u0120singular ity", "count": 1640, "type": "remove by frequency"} -{"id": 46377, "token": "\u0120Wilhelm", "merges": "\u0120Wil helm", "count": 1640, "type": "remove by frequency"} -{"id": 34903, "token": "\u0120blinded", "merges": "\u0120blind ed", "count": 1640, "type": "remove by frequency"} -{"id": 27818, "token": "\u0120Vas", "merges": "\u0120V as", "count": 1641, "type": "remove by frequency"} -{"id": 35027, "token": "\u0120spokeswoman", "merges": "\u0120spokes woman", "count": 1642, "type": "remove by frequency"} -{"id": 38197, "token": "\u0120stimulates", "merges": "\u0120stimul ates", "count": 1642, "type": "remove by frequency"} -{"id": 45853, "token": "\u0120brushing", "merges": "\u0120br ushing", "count": 1642, "type": "remove by frequency"} -{"id": 40131, "token": "\u0120intron", "merges": "\u0120int ron", "count": 1642, "type": "remove by frequency"} -{"id": 19790, "token": "\u0120NMR", "merges": "\u0120N MR", "count": 1642, "type": "remove by frequency"} -{"id": 46123, "token": "ensibly", "merges": "ens ibly", "count": 1643, "type": "remove by frequency"} -{"id": 22976, "token": "\u0120Fran", "merges": "\u0120Fr an", "count": 1643, "type": "remove by frequency"} -{"id": 26184, "token": "\u0120Kid", "merges": "\u0120K id", "count": 1643, "type": "remove by frequency"} -{"id": 42162, "token": "\u0120associative", "merges": "\u0120associ ative", "count": 1644, "type": "remove by frequency"} -{"id": 46903, "token": "\u0120slippery", "merges": "\u0120slipp ery", "count": 1644, "type": "remove by frequency"} -{"id": 47280, "token": "\u0120Funk", "merges": "\u0120F unk", "count": 1644, "type": "remove by frequency"} -{"id": 24522, "token": "\u0120allege", "merges": "\u0120al lege", "count": 1645, "type": "remove by frequency"} -{"id": 30294, "token": "\u0120retrospect", "merges": "\u0120retros pect", "count": 1647, "type": "remove by frequency"} -{"id": 39571, "token": "\u0120Dong", "merges": "\u0120D ong", "count": 1647, "type": "remove by frequency"} -{"id": 36937, "token": "\u00e1\u00bb\u013b", "merges": "\u00e1\u00bb \u013b", "count": 1647, "type": "remove by frequency"} -{"id": 47790, "token": "\u0120corroborated", "merges": "\u0120corrobor ated", "count": 1648, "type": "remove by frequency"} -{"id": 38222, "token": "\u0120venom", "merges": "\u0120ven om", "count": 1649, "type": "remove by frequency"} -{"id": 26917, "token": "Enron", "merges": "En ron", "count": 1649, "type": "remove by frequency"} -{"id": 36039, "token": "\u0120interplay", "merges": "\u0120inter play", "count": 1650, "type": "remove by frequency"} -{"id": 37076, "token": "\u0120Issues", "merges": "\u0120Iss ues", "count": 1650, "type": "remove by frequency"} -{"id": 39362, "token": "\u0120mesmo", "merges": "\u0120mes mo", "count": 1650, "type": "remove by frequency"} -{"id": 41541, "token": "\u0120Achie", "merges": "\u0120A chie", "count": 1650, "type": "remove by frequency"} -{"id": 48580, "token": "\u0120soot", "merges": "\u0120so ot", "count": 1650, "type": "remove by frequency"} -{"id": 35075, "token": "\u0120Orche", "merges": "\u0120Or che", "count": 1651, "type": "remove by frequency"} -{"id": 15500, "token": "\u00c2\u00af", "merges": "\u00c2 \u00af", "count": 1651, "type": "remove by frequency"} -{"id": 27103, "token": "\u0120_________________", "merges": "\u0120_ ________________", "count": 1652, "type": "remove by frequency"} -{"id": 44659, "token": "\u0120froze", "merges": "\u0120fro ze", "count": 1652, "type": "remove by frequency"} -{"id": 47388, "token": "iably", "merges": "i ably", "count": 1652, "type": "remove by frequency"} -{"id": 11910, "token": "\u00ce\u00b5\u00ce\u00b9", "merges": "\u00ce\u00b5 \u00ce\u00b9", "count": 1652, "type": "remove by frequency"} -{"id": 24990, "token": "\u00d7\u00aa", "merges": "\u00d7 \u00aa", "count": 1652, "type": "remove by frequency"} -{"id": 48134, "token": "\u0120snatched", "merges": "\u0120sn atched", "count": 1653, "type": "remove by frequency"} -{"id": 45725, "token": "okrat", "merges": "ok rat", "count": 1653, "type": "remove by frequency"} -{"id": 41587, "token": "\u0120erroneously", "merges": "\u0120errone ously", "count": 1654, "type": "remove by frequency"} -{"id": 46727, "token": "\u0120painfully", "merges": "\u0120pain fully", "count": 1654, "type": "remove by frequency"} -{"id": 28062, "token": "eanor", "merges": "ean or", "count": 1654, "type": "remove by frequency"} -{"id": 46127, "token": "\u0120assassin", "merges": "\u0120assass in", "count": 1655, "type": "remove by frequency"} -{"id": 29931, "token": "\u0120epilepsy", "merges": "\u0120epile psy", "count": 1656, "type": "remove by frequency"} -{"id": 47496, "token": "\u0120Aster", "merges": "\u0120A ster", "count": 1656, "type": "remove by frequency"} -{"id": 50122, "token": "\u0120IIS", "merges": "\u0120I IS", "count": 1656, "type": "remove by frequency"} -{"id": 30383, "token": "\u0120Opera", "merges": "\u0120Oper a", "count": 1657, "type": "remove by frequency"} -{"id": 38790, "token": "\u0120prz", "merges": "\u0120pr z", "count": 1657, "type": "remove by frequency"} -{"id": 17802, "token": "\u0120Winter", "merges": "\u0120W inter", "count": 1658, "type": "remove by frequency"} -{"id": 44494, "token": "\u0120warmed", "merges": "\u0120war med", "count": 1658, "type": "remove by frequency"} -{"id": 47635, "token": "\u0120Maxim", "merges": "\u0120Max im", "count": 1660, "type": "remove by frequency"} -{"id": 34858, "token": "\u0120Fc", "merges": "\u0120F c", "count": 1660, "type": "remove by frequency"} -{"id": 46650, "token": "rospective", "merges": "ros pective", "count": 1661, "type": "remove by frequency"} -{"id": 45650, "token": "\u0120obscured", "merges": "\u0120obsc ured", "count": 1661, "type": "remove by frequency"} -{"id": 40293, "token": "\u0120Cros", "merges": "\u0120C ros", "count": 1661, "type": "remove by frequency"} -{"id": 38654, "token": "\u0120Transcript", "merges": "\u0120Trans cript", "count": 1662, "type": "remove by frequency"} -{"id": 32008, "token": "\u0120Sax", "merges": "\u0120S ax", "count": 1662, "type": "remove by frequency"} -{"id": 45394, "token": "\u0120bounced", "merges": "\u0120b ounced", "count": 1663, "type": "remove by frequency"} -{"id": 36216, "token": "\u0120Triple", "merges": "\u0120Tri ple", "count": 1663, "type": "remove by frequency"} -{"id": 15359, "token": "\u0120Barn", "merges": "\u0120B arn", "count": 1663, "type": "remove by frequency"} -{"id": 31749, "token": "\u0120Gaz", "merges": "\u0120G az", "count": 1663, "type": "remove by frequency"} -{"id": 43733, "token": "\u0120magnets", "merges": "\u0120magn ets", "count": 1664, "type": "remove by frequency"} -{"id": 23626, "token": "\u0120Beyond", "merges": "\u0120Be yond", "count": 1664, "type": "remove by frequency"} -{"id": 49110, "token": "\u0120vener", "merges": "\u0120v ener", "count": 1664, "type": "remove by frequency"} -{"id": 39934, "token": "\u0120constitutive", "merges": "\u0120constit utive", "count": 1666, "type": "remove by frequency"} -{"id": 43486, "token": "\u0120sweetness", "merges": "\u0120sweet ness", "count": 1666, "type": "remove by frequency"} -{"id": 49198, "token": "\u0120assail", "merges": "\u0120ass ail", "count": 1666, "type": "remove by frequency"} -{"id": 23060, "token": "\u0120Nord", "merges": "\u0120N ord", "count": 1666, "type": "remove by frequency"} -{"id": 39713, "token": "\u0120conductors", "merges": "\u0120conduct ors", "count": 1667, "type": "remove by frequency"} -{"id": 37030, "token": "romycin", "merges": "romy cin", "count": 1667, "type": "remove by frequency"} -{"id": 23637, "token": "PLIED", "merges": "PL IED", "count": 1667, "type": "remove by frequency"} -{"id": 44409, "token": "\u0120turf", "merges": "\u0120tur f", "count": 1667, "type": "remove by frequency"} -{"id": 43178, "token": "\u0120LW", "merges": "\u0120L W", "count": 1667, "type": "remove by frequency"} -{"id": 40483, "token": "\u0120troph", "merges": "\u0120t roph", "count": 1668, "type": "remove by frequency"} -{"id": 47867, "token": "\u0120Ajax", "merges": "\u0120A jax", "count": 1668, "type": "remove by frequency"} -{"id": 20754, "token": "\u0120Cold", "merges": "\u0120C old", "count": 1669, "type": "remove by frequency"} -{"id": 29144, "token": "\u0120typeof", "merges": "\u0120type of", "count": 1670, "type": "remove by frequency"} -{"id": 22496, "token": "\u0120gastrointestinal", "merges": "\u0120gastro intestinal", "count": 1672, "type": "remove by frequency"} -{"id": 45350, "token": "\u0120arsenic", "merges": "\u0120arsen ic", "count": 1672, "type": "remove by frequency"} -{"id": 31929, "token": "\u0120indist", "merges": "\u0120ind ist", "count": 1672, "type": "remove by frequency"} -{"id": 19029, "token": "lament", "merges": "l ament", "count": 1672, "type": "remove by frequency"} -{"id": 25150, "token": "\u0120Pun", "merges": "\u0120P un", "count": 1672, "type": "remove by frequency"} -{"id": 42631, "token": "\u0120Rolling", "merges": "\u0120Rol ling", "count": 1673, "type": "remove by frequency"} -{"id": 46226, "token": "\u0120CIT", "merges": "\u0120C IT", "count": 1673, "type": "remove by frequency"} -{"id": 47471, "token": "\u0120allergies", "merges": "\u0120allerg ies", "count": 1674, "type": "remove by frequency"} -{"id": 30443, "token": "\u0120grinned", "merges": "\u0120gr inned", "count": 1674, "type": "remove by frequency"} -{"id": 22725, "token": "\u0120arXiv", "merges": "\u0120ar Xiv", "count": 1674, "type": "remove by frequency"} -{"id": 39279, "token": "\u0120WI", "merges": "\u0120W I", "count": 1674, "type": "remove by frequency"} -{"id": 29620, "token": "\u00d8\u00b2", "merges": "\u00d8 \u00b2", "count": 1674, "type": "remove by frequency"} -{"id": 26559, "token": "Illustration", "merges": "Ill ustration", "count": 1675, "type": "remove by frequency"} -{"id": 48575, "token": "\u0120ISP", "merges": "\u0120IS P", "count": 1676, "type": "remove by frequency"} -{"id": 15175, "token": "\u00c3\u00a4\u00c3\u00a4", "merges": "\u00c3\u00a4 \u00c3\u00a4", "count": 1676, "type": "remove by frequency"} -{"id": 41058, "token": "\u0120Sprint", "merges": "\u0120S print", "count": 1677, "type": "remove by frequency"} -{"id": 33693, "token": "\u0120poder", "merges": "\u0120pod er", "count": 1677, "type": "remove by frequency"} -{"id": 46852, "token": "\u0120randomization", "merges": "\u0120random ization", "count": 1679, "type": "remove by frequency"} -{"id": 48867, "token": "\u0120intimately", "merges": "\u0120int imately", "count": 1679, "type": "remove by frequency"} -{"id": 24022, "token": "\u0120embryonic", "merges": "\u0120embry onic", "count": 1680, "type": "remove by frequency"} -{"id": 25027, "token": "\u0120parasite", "merges": "\u0120paras ite", "count": 1680, "type": "remove by frequency"} -{"id": 20728, "token": "ophagy", "merges": "ophag y", "count": 1680, "type": "remove by frequency"} -{"id": 7705, "token": "\u0120FIG", "merges": "\u0120F IG", "count": 1680, "type": "remove by frequency"} -{"id": 46942, "token": "\u0120Tod", "merges": "\u0120T od", "count": 1680, "type": "remove by frequency"} -{"id": 37088, "token": "\u00e1\u00ba\u00a5", "merges": "\u00e1\u00ba \u00a5", "count": 1681, "type": "remove by frequency"} -{"id": 31191, "token": "\u0120electrolyte", "merges": "\u0120electroly te", "count": 1682, "type": "remove by frequency"} -{"id": 43604, "token": "\u0120accompanies", "merges": "\u0120accompan ies", "count": 1682, "type": "remove by frequency"} -{"id": 41275, "token": "\u0120Integrated", "merges": "\u0120Integr ated", "count": 1682, "type": "remove by frequency"} -{"id": 40178, "token": "\u0120gamers", "merges": "\u0120gam ers", "count": 1682, "type": "remove by frequency"} -{"id": 46038, "token": "\u0120essa", "merges": "\u0120ess a", "count": 1682, "type": "remove by frequency"} -{"id": 40878, "token": "\u0120contradicts", "merges": "\u0120contradict s", "count": 1683, "type": "remove by frequency"} -{"id": 22499, "token": "\u0120Fermi", "merges": "\u0120Ferm i", "count": 1683, "type": "remove by frequency"} -{"id": 49517, "token": "\u0120MSD", "merges": "\u0120M SD", "count": 1683, "type": "remove by frequency"} -{"id": 34954, "token": "$=", "merges": "$ =", "count": 1683, "type": "remove by frequency"} -{"id": 22394, "token": "\u0120cryptocur", "merges": "\u0120crypt ocur", "count": 1684, "type": "remove by frequency"} -{"id": 49401, "token": "etine", "merges": "et ine", "count": 1684, "type": "remove by frequency"} -{"id": 21677, "token": "\u00d7\u0132", "merges": "\u00d7 \u0132", "count": 1684, "type": "remove by frequency"} -{"id": 35196, "token": "\u0120activator", "merges": "\u0120activ ator", "count": 1685, "type": "remove by frequency"} -{"id": 47753, "token": "\u0120remorse", "merges": "\u0120rem orse", "count": 1685, "type": "remove by frequency"} -{"id": 46348, "token": "pathetic", "merges": "pat hetic", "count": 1685, "type": "remove by frequency"} -{"id": 44469, "token": "otherap", "merges": "othe rap", "count": 1685, "type": "remove by frequency"} -{"id": 46278, "token": "\u0120RTC", "merges": "\u0120R TC", "count": 1685, "type": "remove by frequency"} -{"id": 37753, "token": "))=", "merges": ")) =", "count": 1685, "type": "remove by frequency"} -{"id": 5774, "token": ".](", "merges": ". ](", "count": 1685, "type": "remove by frequency"} -{"id": 35755, "token": "\u0120distilled", "merges": "\u0120dist illed", "count": 1686, "type": "remove by frequency"} -{"id": 24892, "token": "\u0120immob", "merges": "\u0120imm ob", "count": 1686, "type": "remove by frequency"} -{"id": 21469, "token": "\u00c2\u0142\u00c2\u0142\u00c2\u0142", "merges": "\u00c2\u0142\u00c2\u0142 \u00c2\u0142", "count": 1686, "type": "remove by frequency"} -{"id": 21073, "token": "orectal", "merges": "orect al", "count": 1687, "type": "remove by frequency"} -{"id": 34079, "token": "\u0120inmate", "merges": "\u0120in mate", "count": 1687, "type": "remove by frequency"} -{"id": 23715, "token": ".\");", "merges": ".\" );", "count": 1687, "type": "remove by frequency"} -{"id": 47191, "token": "\u0120looming", "merges": "\u0120lo oming", "count": 1688, "type": "remove by frequency"} -{"id": 6445, "token": "\u0120Jes", "merges": "\u0120J es", "count": 1688, "type": "remove by frequency"} -{"id": 21352, "token": "\u0120Civ", "merges": "\u0120C iv", "count": 1688, "type": "remove by frequency"} -{"id": 19809, "token": "\u0120EPA", "merges": "\u0120E PA", "count": 1688, "type": "remove by frequency"} -{"id": 39761, "token": "\u0120Lun", "merges": "\u0120L un", "count": 1688, "type": "remove by frequency"} -{"id": 31349, "token": "\u0120Reviews", "merges": "\u0120Review s", "count": 1689, "type": "remove by frequency"} -{"id": 43360, "token": "\u0120GOD", "merges": "\u0120G OD", "count": 1689, "type": "remove by frequency"} -{"id": 30412, "token": "\u0120.\\", "merges": "\u0120. \\", "count": 1689, "type": "remove by frequency"} -{"id": 46387, "token": "\u0120elbows", "merges": "\u0120el bows", "count": 1690, "type": "remove by frequency"} -{"id": 15908, "token": "\u0120Hur", "merges": "\u0120H ur", "count": 1690, "type": "remove by frequency"} -{"id": 28300, "token": "\u0120Pom", "merges": "\u0120P om", "count": 1690, "type": "remove by frequency"} -{"id": 31907, "token": "\u0120teammates", "merges": "\u0120team mates", "count": 1691, "type": "remove by frequency"} -{"id": 41767, "token": "\u0120Levels", "merges": "\u0120Level s", "count": 1691, "type": "remove by frequency"} -{"id": 27922, "token": "\u0120Near", "merges": "\u0120N ear", "count": 1691, "type": "remove by frequency"} -{"id": 35193, "token": "\u0120Feel", "merges": "\u0120Fe el", "count": 1691, "type": "remove by frequency"} -{"id": 18089, "token": "\u0120Antonio", "merges": "\u0120Anton io", "count": 1692, "type": "remove by frequency"} -{"id": 42626, "token": "\u0120gaseous", "merges": "\u0120g aseous", "count": 1692, "type": "remove by frequency"} -{"id": 36192, "token": "\u0120SCC", "merges": "\u0120S CC", "count": 1692, "type": "remove by frequency"} -{"id": 32737, "token": "\u0120Gel", "merges": "\u0120G el", "count": 1692, "type": "remove by frequency"} -{"id": 20770, "token": "\u00d7\u013e", "merges": "\u00d7 \u013e", "count": 1692, "type": "remove by frequency"} -{"id": 35142, "token": "\u0120pancreas", "merges": "\u0120pan creas", "count": 1693, "type": "remove by frequency"} -{"id": 44628, "token": "\u0120APR", "merges": "\u0120A PR", "count": 1693, "type": "remove by frequency"} -{"id": 41343, "token": "\u0120SED", "merges": "\u0120S ED", "count": 1693, "type": "remove by frequency"} -{"id": 48635, "token": "\u0120grasping", "merges": "\u0120grasp ing", "count": 1694, "type": "remove by frequency"} -{"id": 36025, "token": "\u0120liberals", "merges": "\u0120liber als", "count": 1694, "type": "remove by frequency"} -{"id": 36189, "token": "\u0120Mood", "merges": "\u0120M ood", "count": 1694, "type": "remove by frequency"} -{"id": 31910, "token": "\u0120Lay", "merges": "\u0120L ay", "count": 1694, "type": "remove by frequency"} -{"id": 7415, "token": ")^{", "merges": ") ^{", "count": 1694, "type": "remove by frequency"} -{"id": 20560, "token": "\u0120subunit", "merges": "\u0120sub unit", "count": 1695, "type": "remove by frequency"} -{"id": 50330, "token": "\u00e2\u012b\u00a7", "merges": "\u00e2\u012b \u00a7", "count": 1695, "type": "remove by frequency"} -{"id": 35617, "token": "\u0120vacated", "merges": "\u0120vac ated", "count": 1696, "type": "remove by frequency"} -{"id": 22463, "token": "\u0120Robin", "merges": "\u0120Rob in", "count": 1696, "type": "remove by frequency"} -{"id": 37024, "token": "\u0120Huang", "merges": "\u0120Hu ang", "count": 1696, "type": "remove by frequency"} -{"id": 49627, "token": "\u0120Customers", "merges": "\u0120Custom ers", "count": 1697, "type": "remove by frequency"} -{"id": 48423, "token": "\u0120fiercely", "merges": "\u0120fierc ely", "count": 1697, "type": "remove by frequency"} -{"id": 27936, "token": "\u0120Cris", "merges": "\u0120C ris", "count": 1697, "type": "remove by frequency"} -{"id": 25870, "token": "\u0120Gent", "merges": "\u0120G ent", "count": 1697, "type": "remove by frequency"} -{"id": 26805, "token": "\u00c3\u00a1r", "merges": "\u00c3\u00a1 r", "count": 1697, "type": "remove by frequency"} -{"id": 42721, "token": "\u0120vaccinated", "merges": "\u0120vacc inated", "count": 1698, "type": "remove by frequency"} -{"id": 40134, "token": "\u00da\u00af", "merges": "\u00da \u00af", "count": 1698, "type": "remove by frequency"} -{"id": 39646, "token": "StackTrace", "merges": "Stack Trace", "count": 1699, "type": "remove by frequency"} -{"id": 22542, "token": "\u0120GeV", "merges": "\u0120Ge V", "count": 1699, "type": "remove by frequency"} -{"id": 13850, "token": "\u0120\u00ce\u00b3", "merges": "\u0120\u00ce \u00b3", "count": 1699, "type": "remove by frequency"} -{"id": 15403, "token": "\u0120membranes", "merges": "\u0120membr anes", "count": 1700, "type": "remove by frequency"} -{"id": 42925, "token": "astric", "merges": "ast ric", "count": 1700, "type": "remove by frequency"} -{"id": 37776, "token": "\u0120resins", "merges": "\u0120res ins", "count": 1701, "type": "remove by frequency"} -{"id": 33403, "token": "\u00e3\u0123\u00ab\u00e3\u0123\u00aa", "merges": "\u00e3\u0123\u00ab \u00e3\u0123\u00aa", "count": 1701, "type": "remove by frequency"} -{"id": 21495, "token": "\u0120foreach", "merges": "\u0120fore ach", "count": 1702, "type": "remove by frequency"} -{"id": 41656, "token": "\u0120uncor", "merges": "\u0120unc or", "count": 1702, "type": "remove by frequency"} -{"id": 29176, "token": "\u0120Pf", "merges": "\u0120P f", "count": 1702, "type": "remove by frequency"} -{"id": 32199, "token": "\u0120bastard", "merges": "\u0120bast ard", "count": 1703, "type": "remove by frequency"} -{"id": 34290, "token": "ialysis", "merges": "ial ysis", "count": 1703, "type": "remove by frequency"} -{"id": 27200, "token": "\u0120Were", "merges": "\u0120We re", "count": 1703, "type": "remove by frequency"} -{"id": 39092, "token": "\u0120protagonist", "merges": "\u0120protagon ist", "count": 1704, "type": "remove by frequency"} -{"id": 15757, "token": "\u0120Convention", "merges": "\u0120Con vention", "count": 1704, "type": "remove by frequency"} -{"id": 48228, "token": "\u0120premiered", "merges": "\u0120premie red", "count": 1704, "type": "remove by frequency"} -{"id": 37238, "token": "\u0120obsession", "merges": "\u0120ob session", "count": 1704, "type": "remove by frequency"} -{"id": 50007, "token": "\u0120soften", "merges": "\u0120soft en", "count": 1704, "type": "remove by frequency"} -{"id": 54246, "token": "\u00e9\u0136\u00ba", "merges": "\u00e9\u0136 \u00ba", "count": 1704, "type": "remove by frequency"} -{"id": 46469, "token": "\u0120stout", "merges": "\u0120st out", "count": 1705, "type": "remove by frequency"} -{"id": 44914, "token": "\u0120pedig", "merges": "\u0120ped ig", "count": 1705, "type": "remove by frequency"} -{"id": 21138, "token": "\u0120Karl", "merges": "\u0120K arl", "count": 1705, "type": "remove by frequency"} -{"id": 35247, "token": "\u0120terrifying", "merges": "\u0120terr ifying", "count": 1706, "type": "remove by frequency"} -{"id": 34308, "token": "\u0120dissenting", "merges": "\u0120dissent ing", "count": 1706, "type": "remove by frequency"} -{"id": 37162, "token": "\u0120diameters", "merges": "\u0120di ameters", "count": 1706, "type": "remove by frequency"} -{"id": 49136, "token": "\u0120downhill", "merges": "\u0120down hill", "count": 1706, "type": "remove by frequency"} -{"id": 32621, "token": "\u0120torus", "merges": "\u0120tor us", "count": 1706, "type": "remove by frequency"} -{"id": 28657, "token": "orems", "merges": "ore ms", "count": 1706, "type": "remove by frequency"} -{"id": 47999, "token": "\u0120Ling", "merges": "\u0120L ing", "count": 1706, "type": "remove by frequency"} -{"id": 42744, "token": "\u0120(;", "merges": "\u0120( ;", "count": 1706, "type": "remove by frequency"} -{"id": 47865, "token": "\u0120sprinkle", "merges": "\u0120spr inkle", "count": 1707, "type": "remove by frequency"} -{"id": 48883, "token": "\u0120Tb", "merges": "\u0120T b", "count": 1707, "type": "remove by frequency"} -{"id": 43492, "token": "\u0120AGA", "merges": "\u0120A GA", "count": 1708, "type": "remove by frequency"} -{"id": 44636, "token": "\u0120draining", "merges": "\u0120dra ining", "count": 1710, "type": "remove by frequency"} -{"id": 16590, "token": "\u0120Disney", "merges": "\u0120Dis ney", "count": 1710, "type": "remove by frequency"} -{"id": 48258, "token": "\u0120shrew", "merges": "\u0120sh rew", "count": 1710, "type": "remove by frequency"} -{"id": 49412, "token": "\u0120NAV", "merges": "\u0120N AV", "count": 1710, "type": "remove by frequency"} -{"id": 33557, "token": "\u00c3\u00aate", "merges": "\u00c3\u00aa te", "count": 1711, "type": "remove by frequency"} -{"id": 17419, "token": "\u0120Gordon", "merges": "\u0120G ordon", "count": 1712, "type": "remove by frequency"} -{"id": 46590, "token": "\u0120nests", "merges": "\u0120n ests", "count": 1712, "type": "remove by frequency"} -{"id": 44806, "token": "\u0120Gan", "merges": "\u0120G an", "count": 1712, "type": "remove by frequency"} -{"id": 10680, "token": "\u0120inhibitor", "merges": "\u0120inhib itor", "count": 1713, "type": "remove by frequency"} -{"id": 41982, "token": "\u0120bloggers", "merges": "\u0120blog gers", "count": 1713, "type": "remove by frequency"} -{"id": 17771, "token": "\u0120Morris", "merges": "\u0120Mor ris", "count": 1713, "type": "remove by frequency"} -{"id": 38543, "token": "\u0120synergistic", "merges": "\u0120synerg istic", "count": 1714, "type": "remove by frequency"} -{"id": 49328, "token": "\u0120ensembles", "merges": "\u0120en sembles", "count": 1714, "type": "remove by frequency"} -{"id": 44999, "token": "\u0120slurry", "merges": "\u0120sl urry", "count": 1714, "type": "remove by frequency"} -{"id": 40530, "token": "\u00d1\u0139", "merges": "\u00d1 \u0139", "count": 1714, "type": "remove by frequency"} -{"id": 40943, "token": "\u0120bowls", "merges": "\u0120bow ls", "count": 1715, "type": "remove by frequency"} -{"id": 22596, "token": "\u0120opio", "merges": "\u0120op io", "count": 1715, "type": "remove by frequency"} -{"id": 29895, "token": "\u0120ASD", "merges": "\u0120A SD", "count": 1715, "type": "remove by frequency"} -{"id": 50063, "token": "\u0120admirable", "merges": "\u0120adm irable", "count": 1716, "type": "remove by frequency"} -{"id": 33871, "token": "\u0120cinnamon", "merges": "\u0120c innamon", "count": 1716, "type": "remove by frequency"} -{"id": 19006, "token": "\u0120urinary", "merges": "\u0120ur inary", "count": 1716, "type": "remove by frequency"} -{"id": 47783, "token": "\u0120Sword", "merges": "\u0120S word", "count": 1716, "type": "remove by frequency"} -{"id": 18749, "token": "\u0120Baker", "merges": "\u0120B aker", "count": 1716, "type": "remove by frequency"} -{"id": 45238, "token": "\u0120palate", "merges": "\u0120pal ate", "count": 1717, "type": "remove by frequency"} -{"id": 50228, "token": "izability", "merges": "iz ability", "count": 1718, "type": "remove by frequency"} -{"id": 42282, "token": "\u0120slips", "merges": "\u0120sl ips", "count": 1718, "type": "remove by frequency"} -{"id": 39997, "token": "chte", "merges": "ch te", "count": 1718, "type": "remove by frequency"} -{"id": 25632, "token": "\u0120Experimental", "merges": "\u0120Exper imental", "count": 1719, "type": "remove by frequency"} -{"id": 39786, "token": "\u0120agitation", "merges": "\u0120ag itation", "count": 1720, "type": "remove by frequency"} -{"id": 22231, "token": "\u0120CONDITION", "merges": "\u0120CON DITION", "count": 1721, "type": "remove by frequency"} -{"id": 41472, "token": "\u0120painters", "merges": "\u0120pain ters", "count": 1721, "type": "remove by frequency"} -{"id": 40411, "token": "orescent", "merges": "ores cent", "count": 1721, "type": "remove by frequency"} -{"id": 17405, "token": "ollary", "merges": "oll ary", "count": 1721, "type": "remove by frequency"} -{"id": 7230, "token": "\u0120Flor", "merges": "\u0120Fl or", "count": 1721, "type": "remove by frequency"} -{"id": 42279, "token": "imide", "merges": "im ide", "count": 1722, "type": "remove by frequency"} -{"id": 30827, "token": "\u0120Sett", "merges": "\u0120S ett", "count": 1723, "type": "remove by frequency"} -{"id": 32908, "token": "\u0120thermodynamic", "merges": "\u0120therm odynamic", "count": 1724, "type": "remove by frequency"} -{"id": 37175, "token": "\u0120richness", "merges": "\u0120rich ness", "count": 1724, "type": "remove by frequency"} -{"id": 27104, "token": "\u0120Darwin", "merges": "\u0120Dar win", "count": 1724, "type": "remove by frequency"} -{"id": 30759, "token": "\u0120Medal", "merges": "\u0120Med al", "count": 1724, "type": "remove by frequency"} -{"id": 19544, "token": "\u0120Cohen", "merges": "\u0120Co hen", "count": 1724, "type": "remove by frequency"} -{"id": 33428, "token": "\u0120adulthood", "merges": "\u0120adul thood", "count": 1725, "type": "remove by frequency"} -{"id": 35524, "token": "\u0120obsessed", "merges": "\u0120obs essed", "count": 1725, "type": "remove by frequency"} -{"id": 39670, "token": "\u0120oft", "merges": "\u0120of t", "count": 1725, "type": "remove by frequency"} -{"id": 36719, "token": "\u0120characterised", "merges": "\u0120character ised", "count": 1726, "type": "remove by frequency"} -{"id": 42189, "token": "\u0120intensely", "merges": "\u0120intens ely", "count": 1726, "type": "remove by frequency"} -{"id": 29254, "token": "\u0120critique", "merges": "\u0120crit ique", "count": 1726, "type": "remove by frequency"} -{"id": 16499, "token": "ylation", "merges": "yl ation", "count": 1726, "type": "remove by frequency"} -{"id": 37359, "token": "\u0120disastrous", "merges": "\u0120disast rous", "count": 1727, "type": "remove by frequency"} -{"id": 25668, "token": "\u0120anesthesia", "merges": "\u0120anest hesia", "count": 1727, "type": "remove by frequency"} -{"id": 36521, "token": "\u0120awhile", "merges": "\u0120a while", "count": 1727, "type": "remove by frequency"} -{"id": 25806, "token": "\u0120finely", "merges": "\u0120fin ely", "count": 1727, "type": "remove by frequency"} -{"id": 49779, "token": "\u0120sparing", "merges": "\u0120sp aring", "count": 1728, "type": "remove by frequency"} -{"id": 13236, "token": "\u0120teasp", "merges": "\u0120te asp", "count": 1729, "type": "remove by frequency"} -{"id": 24471, "token": "\u0120Privacy", "merges": "\u0120Priv acy", "count": 1730, "type": "remove by frequency"} -{"id": 19520, "token": "\u0120Bishop", "merges": "\u0120B ishop", "count": 1730, "type": "remove by frequency"} -{"id": 21200, "token": "olytic", "merges": "ol ytic", "count": 1730, "type": "remove by frequency"} -{"id": 30493, "token": "athione", "merges": "ath ione", "count": 1731, "type": "remove by frequency"} -{"id": 46853, "token": "\u0120exclus", "merges": "\u0120ex clus", "count": 1731, "type": "remove by frequency"} -{"id": 15562, "token": "\u0120Liber", "merges": "\u0120L iber", "count": 1731, "type": "remove by frequency"} -{"id": 43915, "token": "\u0120quir", "merges": "\u0120qu ir", "count": 1731, "type": "remove by frequency"} -{"id": 30453, "token": "\u0120drawbacks", "merges": "\u0120draw backs", "count": 1732, "type": "remove by frequency"} -{"id": 45086, "token": "\u0120thinkers", "merges": "\u0120think ers", "count": 1732, "type": "remove by frequency"} -{"id": 16542, "token": "\u0120comme", "merges": "\u0120comm e", "count": 1732, "type": "remove by frequency"} -{"id": 36069, "token": "\u0120fuer", "merges": "\u0120f uer", "count": 1732, "type": "remove by frequency"} -{"id": 33012, "token": "\u0120metaphys", "merges": "\u0120met aphys", "count": 1733, "type": "remove by frequency"} -{"id": 4535, "token": "varphi", "merges": "var phi", "count": 1733, "type": "remove by frequency"} -{"id": 23458, "token": "\u0120dilution", "merges": "\u0120dil ution", "count": 1734, "type": "remove by frequency"} -{"id": 48767, "token": "\u0120ICP", "merges": "\u0120I CP", "count": 1734, "type": "remove by frequency"} -{"id": 46401, "token": "\\+", "merges": "\\ +", "count": 1734, "type": "remove by frequency"} -{"id": 37576, "token": "\u0120assaulted", "merges": "\u0120assault ed", "count": 1735, "type": "remove by frequency"} -{"id": 49726, "token": "\u0120sails", "merges": "\u0120sa ils", "count": 1735, "type": "remove by frequency"} -{"id": 41465, "token": "\u0120contractions", "merges": "\u0120contra ctions", "count": 1736, "type": "remove by frequency"} -{"id": 30208, "token": "\u0120surgeons", "merges": "\u0120surge ons", "count": 1736, "type": "remove by frequency"} -{"id": 23842, "token": "\u0120anatom", "merges": "\u0120an atom", "count": 1736, "type": "remove by frequency"} -{"id": 43567, "token": "\u0120jaws", "merges": "\u0120j aws", "count": 1736, "type": "remove by frequency"} -{"id": 15104, "token": "\u0120