Spaces:

yhshin
/

latex-ocr

Runtime error

App Files Files Community

Young Ho Shin commited on Apr 27, 2022

Commit

ea6e17a

•

1 Parent(s): 616f737

Add model version4

Browse files

Files changed (2) hide show

model/config.json +168 -0
model/tokenizer-wordlevel.json +352 -0

model/config.json ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "_name_or_path": "microsoft/trocr-small-stage1",
+  "architectures": [
+    "VisionEncoderDecoderModel"
+  ],
+  "decoder": {
+    "_name_or_path": "",
+    "activation_dropout": 0.0,
+    "activation_function": "relu",
+    "add_cross_attention": true,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": 384,
+    "d_model": 256,
+    "decoder_attention_heads": 8,
+    "decoder_ffn_dim": 1024,
+    "decoder_layerdrop": 0.0,
+    "decoder_layers": 6,
+    "decoder_start_token_id": 2,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "init_std": 0.02,
+    "is_decoder": true,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layernorm_embedding": true,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 512,
+    "min_length": 0,
+    "model_type": "trocr",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": true,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.16.2",
+    "use_bfloat16": false,
+    "use_cache": false,
+    "use_learned_position_embeddings": true,
+    "vocab_size": 64044
+  },
+  "decoder_start_token_id": 1,
+  "early_stopping": true,
+  "encoder": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_probs_dropout_prob": 0.0,
+    "bad_words_ids": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 384,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 384,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-12,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "deit",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 6,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 16,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "qkv_bias": true,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.16.2",
+    "use_bfloat16": false
+  },
+  "eos_token_id": 2,
+  "is_encoder_decoder": true,
+  "length_penalty": 2.0,
+  "max_length": 100,
+  "model_type": "vision-encoder-decoder",
+  "no_repeat_ngram_size": 3,
+  "num_beams": 4,
+  "pad_token_id": 3,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": null,
+  "vocab_size": 200
+}

model/tokenizer-wordlevel.json ADDED Viewed

	@@ -0,0 +1,352 @@

+{
+  "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 100,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
+  "padding": {
+    "strategy": {
+      "Fixed": 100
+    },
+    "direction": "Right",
+    "pad_to_multiple_of": null,
+    "pad_id": 0,
+    "pad_type_id": 0,
+    "pad_token": "[PAD]"
+  },
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "[UNK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "[CLS]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "[SEP]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 3,
+      "content": "[PAD]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 4,
+      "content": "[MASK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "Whitespace"
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "[CLS]": {
+        "id": "[CLS]",
+        "ids": [
+          1
+        ],
+        "tokens": [
+          "[CLS]"
+        ]
+      },
+      "[SEP]": {
+        "id": "[SEP]",
+        "ids": [
+          2
+        ],
+        "tokens": [
+          "[SEP]"
+        ]
+      }
+    }
+  },
+  "decoder": null,
+  "model": {
+    "type": "WordLevel",
+    "vocab": {
+      "[UNK]": 0,
+      "[CLS]": 1,
+      "[SEP]": 2,
+      "[PAD]": 3,
+      "[MASK]": 4,
+      "}": 5,
+      "{": 6,
+      "\\": 7,
+      "_": 8,
+      "^": 9,
+      "(": 10,
+      ")": 11,
+      "2": 12,
+      "1": 13,
+      "-": 14,
+      "=": 15,
+      ",": 16,
+      "+": 17,
+      "frac": 18,
+      "i": 19,
+      "0": 20,
+      "x": 21,
+      "n": 22,
+      ".": 23,
+      "d": 24,
+      "\\,": 25,
+      "a": 26,
+      "mu": 27,
+      "left": 28,
+      "right": 29,
+      "e": 30,
+      "k": 31,
+      "c": 32,
+      "m": 33,
+      "r": 34,
+      "p": 35,
+      "3": 36,
+      "alpha": 37,
+      "t": 38,
+      "partial": 39,
+      "~": 40,
+      "l": 41,
+      "A": 42,
+      "s": 43,
+      "&": 44,
+      "4": 45,
+      "j": 46,
+      "\\;": 47,
+      "g": 48,
+      "prime": 49,
+      "]": 50,
+      "[": 51,
+      "nu": 52,
+      "z": 53,
+      "pi": 54,
+      "|": 55,
+      "b": 56,
+      "phi": 57,
+      "\\\\": 58,
+      "mathrm": 59,
+      "q": 60,
+      "operatorname": 61,
+      "cal": 62,
+      "N": 63,
+      "delta": 64,
+      "f": 65,
+      "lambda": 66,
+      "beta": 67,
+      "bar": 68,
+      "T": 69,
+      "int": 70,
+      "array": 71,
+      "R": 72,
+      "S": 73,
+      "D": 74,
+      "L": 75,
+      "M": 76,
+      "B": 77,
+      "y": 78,
+      "sigma": 79,
+      "F": 80,
+      "theta": 81,
+      "/": 82,
+      "gamma": 83,
+      "h": 84,
+      "hat": 85,
+      "psi": 86,
+      "sqrt": 87,
+      "sum": 88,
+      "u": 89,
+      "H": 90,
+      "o": 91,
+      "rho": 92,
+      "tilde": 93,
+      "tau": 94,
+      "C": 95,
+      "P": 96,
+      "G": 97,
+      "V": 98,
+      "I": 99,
+      "X": 100,
+      "omega": 101,
+      "epsilon": 102,
+      "E": 103,
+      "J": 104,
+      "bf": 105,
+      "eta": 106,
+      "v": 107,
+      "xi": 108,
+      "Q": 109,
+      "Phi": 110,
+      "quad": 111,
+      "*": 112,
+      "5": 113,
+      "\\{": 114,
+      "vec": 115,
+      "begin": 116,
+      "end": 117,
+      "Gamma": 118,
+      "K": 119,
+      "infty": 120,
+      "\\}": 121,
+      "6": 122,
+      "U": 123,
+      "rangle": 124,
+      "dot": 125,
+      "W": 126,
+      "pm": 127,
+      "Lambda": 128,
+      "Z": 129,
+      "varphi": 130,
+      "Delta": 131,
+      "w": 132,
+      "chi": 133,
+      ";": 134,
+      "8": 135,
+      "\\!": 136,
+      "Omega": 137,
+      "kappa": 138,
+      "qquad": 139,
+      "cdot": 140,
+      "Psi": 141,
+      "equiv": 142,
+      "langle": 143,
+      "overline": 144,
+      ">": 145,
+      "<": 146,
+      "dagger": 147,
+      "zeta": 148,
+      "varepsilon": 149,
+      "cdots": 150,
+      "rightarrow": 151,
+      "O": 152,
+      "nabla": 153,
+      "Y": 154,
+      "ldots": 155,
+      ":": 156,
+      "Sigma": 157,
+      "ell": 158,
+      "7": 159,
+      "mathcal": 160,
+      "\\:": 161,
+      "!": 162,
+      "otimes": 163,
+      "prod": 164,
+      "wedge": 165,
+      "9": 166,
+      "hspace": 167,
+      "Pi": 168,
+      "hbar": 169,
+      "sim": 170,
+      "vert": 171,
+      "in": 172,
+      "Big": 173,
+      "widetilde": 174,
+      "displaystyle": 175,
+      "times": 176,
+      "Theta": 177,
+      "underline": 178,
+      "mid": 179,
+      "to": 180,
+      "dots": 181,
+      "mathbf": 182,
+      "ast": 183,
+      "leq": 184,
+      "approx": 185,
+      "star": 186,
+      "stackrel": 187,
+      "perp": 188,
+      "widehat": 189,
+      "big": 190,
+      "vartheta": 191,
+      "'": 192,
+      "Bigr": 193,
+      "geq": 194,
+      "mp": 195,
+      "Bigl": 196,
+      "dag": 197,
+      "neq": 198,
+      "simeq": 199
+    },
+    "unk_token": "[UNK]"
+  }
+}