add model

Browse files

Files changed (6) hide show

README.md +15 -0
config.json +30 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
tokenizer.json +1 -0
tokenizer_config.json +1 -0

README.md ADDED Viewed

	@@ -0,0 +1,15 @@

+---
+language: multilingual
+tags:
+  - electra
+  - testing
+  - minuscule
+license: "cc0-1.0"
+---
+ELECTRA-minuscule-generator
+===============================
+A ridiculously small ELECTRA generator model for testing purposes.
+**THIS MODEL HAS NOT BEEN TRAINED, DO NOT EXPECT ANYThING OF IT.**

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": ".",
+  "architectures": [
+    "ElectraForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "embedding_size": 32,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 32,
+  "initializer_range": 0.02,
+  "intermediate_size": 128,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "electra",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "summary_activation": "gelu",
+  "summary_last_dropout": 0.1,
+  "summary_type": "first",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.15.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 256
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a763bbafe33398915c006e5d141b4532a9b6e1804bb74ccb60309bb6403c417b
+size 225892

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}

tokenizer.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"version":"1.0","truncation":{"max_length":510,"strategy":"LongestFirst","stride":0},"padding":null,"added_tokens":[{"id":0,"special":true,"content":"<s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"<pad>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"</s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":3,"special":true,"content":"<unk>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":4,"special":true,"content":"<mask>","single_word":false,"lstrip":true,"rstrip":false,"normalized":true}],"normalizer":null,"pre_tokenizer":{"type":"ByteLevel","add_prefix_space":false,"trim_offsets":true},"post_processor":{"type":"RobertaProcessing","sep":["</s>",2],"cls":["<s>",0],"trim_offsets":true,"add_prefix_space":true},"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"<unk>","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"<mask>":4,"!":5,"(":6,")":7,"*":8,",":9,"-":10,".":11,"/":12,"0":13,"1":14,"2":15,"3":16,"4":17,"5":18,"6":19,"7":20,"8":21,":":22,";":23,"A":24,"B":25,"C":26,"D":27,"E":28,"F":29,"G":30,"H":31,"I":32,"J":33,"L":34,"M":35,"N":36,"O":37,"P":38,"Q":39,"R":40,"S":41,"T":42,"U":43,"V":44,"W":45,"Y":46,"[":47,"]":48,"_":49,"a":50,"b":51,"c":52,"d":53,"e":54,"f":55,"g":56,"h":57,"i":58,"j":59,"k":60,"l":61,"m":62,"n":63,"o":64,"p":65,"q":66,"r":67,"s":68,"t":69,"u":70,"v":71,"w":72,"x":73,"y":74,"z":75,"¦":76,"§":77,"¨":78,"©":79,"ª":80,"«":81,"®":82,"¯":83,"´":84,"¶":85,"»":86,"Â":87,"Ã":88,"â":89,"Ċ":90,"Ġ":91,"Ģ":92,"Ĩ":93,"ī":94,"ĳ":95,"Ķ":96,"Ļ":97,"ł":98,"de":99,"Ġt":100,"Ġl":101,"ou":102,"Ġe":103,"Ġp":104,"Ġm":105,"Ġc":106,"Ġde":107,"âĢ":108,"âĢĻ":109,"nt":110,"Ġs":111,"re":112,"qu":113,"Ã©":114,"es":115,"on":116,"er":117,"ai":118,"Ġet":119,"Ġmo":120,"Ġqu":121,"eu":122,"Ġa":123,"Ġd":124,"Ġpa":125,"me":126,"Ġmode":127,"Ġn":128,"Ġle":129,"Ġtr":130,"le":131,"Ġla":132,"our":133,"in":134,"om":135,"Ġque":136,"ie":137,"ant":138,"Ġtu":139,"Ġou":140,"an":141,"us":142,"Ġen":143,"eur":144,"is":145,"or":146,"Ġv":147,"Ġpl":148,"Ãł":149,"un":150,"os":151,"il":152,"ais":153,"ĠÃ©":154,"au":155,"it":156,"Ġf":157,"Ġse":158,"Ġcom":159,"Ġch":160,"ch":161,"ĠÃł":162,"Ġtou":163,"Ġles":164,"Ġpar":165,"um":166,"Ġj":167,"te":168,"ur":169,"onn":170,"Ġmin":171,"Ġdes":172,"st":173,"Ġ;":174,"ien":175,"ti":176,"Ġpas":177,"en":178,"Ġton":179,"ĠÃ©t":180,"Ġcomme":181,"Ġmineur":182,"ium":183,"Ġpour":184,"Ġplus":185,"tre":186,"Ġb":187,"ent":188,"Ġre":189,"ce":190,"Ġg":191,"as":192,"eux":193,"ĠL":194,"ans":195,"Ã¨":196,"ain":197,"air":198,"Ġne":199,"ire":200,"pp":201,"que":202,"Ġac":203,"ours":204,"Ġy":205,"Ġma":206,"Ġtrou":207,"Ġtris":208,"Ġchant":209,"ĠM":210,"Ġce":211,"ons":212,"ar":213,"ver":214,"Ġest":215,"lle":216,"Ġo":217,"hor":218,"Ġqui":219,"Ġsou":220,"ment":221,"uran":222,"dre":223,"Ġni":224,"ag":225,"tes":226,"Ġcor":227,"Ã©s":228,"ins":229,"Ġver":230,"ois":231,"ux":232,"vant":233,"ys":234,"ĠV":235,"Ġsi":236,"erch":237,"Ġmoins":238,"iel":239,"Ġtout":240,"iÃ¨":241,"jours":242,"ne":243,"Ġent":244,"Ġcl":245,"Ġcar":246,"Ġdeux":247,"Ġson":248,"Ġdans":249,"Ġcherch":250,"Ġtoujours":251,"Ġbien":252,"ĠLa":253,"Ġacti":254,"elle":255},"merges":["d e","Ġ t","Ġ l","o u","Ġ e","Ġ p","Ġ m","Ġ c","Ġ de","â Ģ","âĢ Ļ","n t","Ġ s","r e","q u","Ã ©","e s","o n","e r","a i","Ġe t","Ġm o","Ġ qu","e u","Ġ a","Ġ d","Ġp a","m e","Ġmo de","Ġ n","Ġl e","Ġt r","l e","Ġl a","ou r","i n","o m","Ġqu e","i e","a nt","Ġt u","Ġ ou","a n","u s","Ġe n","eu r","i s","o r","Ġ v","Ġp l","Ã ł","u n","o s","i l","ai s","Ġ Ã©","a u","i t","Ġ f","Ġs e","Ġc om","Ġc h","c h","Ġ Ãł","Ġt ou","Ġl es","Ġpa r","u m","Ġ j","t e","u r","on n","Ġm in","Ġde s","s t","Ġ ;","ie n","t i","Ġpa s","e n","Ġt on","ĠÃ© t","Ġcom me","Ġmin eur","i um","Ġp our","Ġpl us","t re","Ġ b","e nt","Ġ re","c e","Ġ g","a s","eu x","Ġ L","an s","Ã ¨","ai n","ai r","Ġn e","i re","p p","qu e","Ġa c","our s","Ġ y","Ġm a","Ġtr ou","Ġtr is","Ġch ant","Ġ M","Ġc e","on s","a r","v er","Ġe st","l le","Ġ o","h or","Ġqu i","Ġs ou","me nt","ur an","d re","Ġn i","a g","t es","Ġc or","Ã© s","in s","Ġv er","o is","u x","v ant","y s","Ġ V","Ġs i","er ch","Ġmo ins","ie l","Ġtou t","i Ã¨","j ours","n e","Ġe nt","Ġc l","Ġc ar","Ġde ux","Ġs on","Ġd ans","Ġch erch","Ġtou jours","Ġb ien","ĠL a","Ġac ti","e lle"]}}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "max_len": 512, "special_tokens_map_file": "local/tokenizer/roberta-tiny/special_tokens_map.json", "name_or_path": "local/tokenizer/roberta-tiny", "tokenizer_class": "RobertaTokenizer"}