lgrobol commited on
Commit
202bce1
1 Parent(s): 41d0a9a
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: multilingual
3
+ tags:
4
+ - electra
5
+ - testing
6
+ - minuscule
7
+ license: "cc0-1.0"
8
+ ---
9
+
10
+ ELECTRA-minuscule-generator
11
+ ===============================
12
+
13
+ A ridiculously small ELECTRA generator model for testing purposes.
14
+
15
+ **THIS MODEL HAS NOT BEEN TRAINED, DO NOT EXPECT ANYThING OF IT.**
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": ".",
3
+ "architectures": [
4
+ "ElectraForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "embedding_size": 32,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 32,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 128,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "electra",
17
+ "num_attention_heads": 2,
18
+ "num_hidden_layers": 2,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "summary_activation": "gelu",
22
+ "summary_last_dropout": 0.1,
23
+ "summary_type": "first",
24
+ "summary_use_proj": true,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.15.0",
27
+ "type_vocab_size": 2,
28
+ "use_cache": true,
29
+ "vocab_size": 256
30
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a763bbafe33398915c006e5d141b4532a9b6e1804bb74ccb60309bb6403c417b
3
+ size 225892
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tokenizer.json ADDED
@@ -0,0 +1 @@
 
1
+ {"version":"1.0","truncation":{"max_length":510,"strategy":"LongestFirst","stride":0},"padding":null,"added_tokens":[{"id":0,"special":true,"content":"<s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"<pad>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"</s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":3,"special":true,"content":"<unk>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":4,"special":true,"content":"<mask>","single_word":false,"lstrip":true,"rstrip":false,"normalized":true}],"normalizer":null,"pre_tokenizer":{"type":"ByteLevel","add_prefix_space":false,"trim_offsets":true},"post_processor":{"type":"RobertaProcessing","sep":["</s>",2],"cls":["<s>",0],"trim_offsets":true,"add_prefix_space":true},"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"<unk>","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"<mask>":4,"!":5,"(":6,")":7,"*":8,",":9,"-":10,".":11,"/":12,"0":13,"1":14,"2":15,"3":16,"4":17,"5":18,"6":19,"7":20,"8":21,":":22,";":23,"A":24,"B":25,"C":26,"D":27,"E":28,"F":29,"G":30,"H":31,"I":32,"J":33,"L":34,"M":35,"N":36,"O":37,"P":38,"Q":39,"R":40,"S":41,"T":42,"U":43,"V":44,"W":45,"Y":46,"[":47,"]":48,"_":49,"a":50,"b":51,"c":52,"d":53,"e":54,"f":55,"g":56,"h":57,"i":58,"j":59,"k":60,"l":61,"m":62,"n":63,"o":64,"p":65,"q":66,"r":67,"s":68,"t":69,"u":70,"v":71,"w":72,"x":73,"y":74,"z":75,"¦":76,"§":77,"¨":78,"©":79,"ª":80,"«":81,"®":82,"¯":83,"´":84,"¶":85,"»":86,"Â":87,"Ã":88,"â":89,"Ċ":90,"Ġ":91,"Ģ":92,"Ĩ":93,"ī":94,"ij":95,"Ķ":96,"Ļ":97,"ł":98,"de":99,"Ġt":100,"Ġl":101,"ou":102,"Ġe":103,"Ġp":104,"Ġm":105,"Ġc":106,"Ġde":107,"âĢ":108,"âĢĻ":109,"nt":110,"Ġs":111,"re":112,"qu":113,"é":114,"es":115,"on":116,"er":117,"ai":118,"Ġet":119,"Ġmo":120,"Ġqu":121,"eu":122,"Ġa":123,"Ġd":124,"Ġpa":125,"me":126,"Ġmode":127,"Ġn":128,"Ġle":129,"Ġtr":130,"le":131,"Ġla":132,"our":133,"in":134,"om":135,"Ġque":136,"ie":137,"ant":138,"Ġtu":139,"Ġou":140,"an":141,"us":142,"Ġen":143,"eur":144,"is":145,"or":146,"Ġv":147,"Ġpl":148,"Ãł":149,"un":150,"os":151,"il":152,"ais":153,"Ġé":154,"au":155,"it":156,"Ġf":157,"Ġse":158,"Ġcom":159,"Ġch":160,"ch":161,"ĠÃł":162,"Ġtou":163,"Ġles":164,"Ġpar":165,"um":166,"Ġj":167,"te":168,"ur":169,"onn":170,"Ġmin":171,"Ġdes":172,"st":173,"Ġ;":174,"ien":175,"ti":176,"Ġpas":177,"en":178,"Ġton":179,"Ġét":180,"Ġcomme":181,"Ġmineur":182,"ium":183,"Ġpour":184,"Ġplus":185,"tre":186,"Ġb":187,"ent":188,"Ġre":189,"ce":190,"Ġg":191,"as":192,"eux":193,"ĠL":194,"ans":195,"è":196,"ain":197,"air":198,"Ġne":199,"ire":200,"pp":201,"que":202,"Ġac":203,"ours":204,"Ġy":205,"Ġma":206,"Ġtrou":207,"Ġtris":208,"Ġchant":209,"ĠM":210,"Ġce":211,"ons":212,"ar":213,"ver":214,"Ġest":215,"lle":216,"Ġo":217,"hor":218,"Ġqui":219,"Ġsou":220,"ment":221,"uran":222,"dre":223,"Ġni":224,"ag":225,"tes":226,"Ġcor":227,"és":228,"ins":229,"Ġver":230,"ois":231,"ux":232,"vant":233,"ys":234,"ĠV":235,"Ġsi":236,"erch":237,"Ġmoins":238,"iel":239,"Ġtout":240,"iè":241,"jours":242,"ne":243,"Ġent":244,"Ġcl":245,"Ġcar":246,"Ġdeux":247,"Ġson":248,"Ġdans":249,"Ġcherch":250,"Ġtoujours":251,"Ġbien":252,"ĠLa":253,"Ġacti":254,"elle":255},"merges":["d e","Ġ t","Ġ l","o u","Ġ e","Ġ p","Ġ m","Ġ c","Ġ de","â Ģ","âĢ Ļ","n t","Ġ s","r e","q u","à ©","e s","o n","e r","a i","Ġe t","Ġm o","Ġ qu","e u","Ġ a","Ġ d","Ġp a","m e","Ġmo de","Ġ n","Ġl e","Ġt r","l e","Ġl a","ou r","i n","o m","Ġqu e","i e","a nt","Ġt u","Ġ ou","a n","u s","Ġe n","eu r","i s","o r","Ġ v","Ġp l","à ł","u n","o s","i l","ai s","Ġ é","a u","i t","Ġ f","Ġs e","Ġc om","Ġc h","c h","Ġ Ãł","Ġt ou","Ġl es","Ġpa r","u m","Ġ j","t e","u r","on n","Ġm in","Ġde s","s t","Ġ ;","ie n","t i","Ġpa s","e n","Ġt on","Ġé t","Ġcom me","Ġmin eur","i um","Ġp our","Ġpl us","t re","Ġ b","e nt","Ġ re","c e","Ġ g","a s","eu x","Ġ L","an s","à ¨","ai n","ai r","Ġn e","i re","p p","qu e","Ġa c","our s","Ġ y","Ġm a","Ġtr ou","Ġtr is","Ġch ant","Ġ M","Ġc e","on s","a r","v er","Ġe st","l le","Ġ o","h or","Ġqu i","Ġs ou","me nt","ur an","d re","Ġn i","a g","t es","Ġc or","é s","in s","Ġv er","o is","u x","v ant","y s","Ġ V","Ġs i","er ch","Ġmo ins","ie l","Ġtou t","i è","j ours","n e","Ġe nt","Ġc l","Ġc ar","Ġde ux","Ġs on","Ġd ans","Ġch erch","Ġtou jours","Ġb ien","ĠL a","Ġac ti","e lle"]}}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "max_len": 512, "special_tokens_map_file": "local/tokenizer/roberta-tiny/special_tokens_map.json", "name_or_path": "local/tokenizer/roberta-tiny", "tokenizer_class": "RobertaTokenizer"}