Huertas97 commited on
Commit
9a78c99
1 Parent(s): 42c06bd

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -32,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ xx_LeetSpeakNER_mstsb_mpnet-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
36
+ vocab/strings.json filter=lfs diff=lfs merge=lfs -text
37
+ transformer/model filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_test_IT-checkpoint.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,overall,IT,0.9111816019032514,0.9054373522458629,0.908300395256917,1149,112,120,0.9903846153846154,0.976303317535545,0.9832935560859187,6798,66,165,0.9581081081081081,0.8872203973095574,0.921302688215707,5672,248,721,0.9981698947689492,0.9982269841572455,0.9981984386468272,104720,192,186,0.8543046357615894,0.9694692343823391,0.9082508250825083,4128,704,130,0.9438691806575757,0.9893205373660018,0.8912935031760101
3
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,IT,0.9237057220708447,0.9416666666666667,0.9325997248968363,339,28,21,0.987865416436845,0.9781540141998908,0.9829857299670692,1791,22,40,0.9668874172185431,0.8728260869565218,0.9174521565267066,1606,55,234,0.9986455632521961,0.9978732454274777,0.9982592549611233,25806,35,55,0.8390966831333804,0.9850869925434963,0.90625,1189,228,18,0.947509373270347,0.9881668220843114,0.8953833601703967
4
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,TED2020,IT,0.9244791666666666,0.922077922077922,0.9232769830949284,355,29,30,0.9933469805527124,0.9798081776880363,0.986531130876747,1941,13,40,0.9367396593673966,0.9005847953216374,0.9183064997018485,1540,104,170,0.998563074352548,0.9984963410966685,0.998529706609637,29882,43,45,0.8759177679882526,0.9423380726698263,0.9079147640791476,1193,169,73,0.9469118168724618,0.9898494428534974,0.8972171482155227
5
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,IT,0.8919449901768173,0.8680688336520076,0.87984496124031,454,55,69,0.9897622192866579,0.9730519480519481,0.981335952848723,2997,31,83,0.9662317728319263,0.8903818953323904,0.9267574530732425,2518,88,310,0.9977708518235671,0.9982447445587082,0.9980077419354838,48341,108,85,0.8525073746312685,0.9802148106274732,0.911911648698396,1734,300,35,0.9395715515592311,0.989722035813937,0.8849438986122266
6
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,IT,1.0,1.0,1.0,1,0,0,1.0,1.0,1.0,27,0,0,1.0,0.5333333333333333,0.6956521739130436,8,0,7,1.0,1.0,1.0,373,0,0,0.5333333333333333,1.0,0.6956521739130436,8,7,0,0.8782608695652174,0.9834905660377359,0.8133333333333332
.ipynb_checkpoints/XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_test_XX-checkpoint.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,overall,XX,0.9481957842086459,0.8864395457581831,0.9162782668738132,5308,290,680,0.9867801991629384,0.9217557766453666,0.9531602866144366,34187,458,2902,0.9014184856532159,0.8853898272735948,0.8933322634999599,27834,3044,3603,0.9979634172442747,0.9988415703231079,0.9984023006870109,606153,1237,703,0.8607574405968696,0.9736274824565426,0.9137200043043149,21228,3434,575,0.9349786243959072,0.9879645549530486,0.8794994735051885
3
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,XX,0.9451388888888889,0.9013245033112582,0.9227118644067797,1361,79,149,0.9872747747747748,0.9192618223760092,0.9520551664223272,8767,113,770,0.8947044334975369,0.8599668560606061,0.8769917914051184,7265,855,1183,0.9982408940397351,0.9984475263920514,0.9983441995239574,125411,221,195,0.8252884031572556,0.9780536067638065,0.8952004610191817,5437,1151,122,0.9290606965554729,0.983943979822116,0.8664131850952455
4
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,TED2020,XX,0.9566343042071197,0.9140383426097711,0.9348513598987982,1478,67,139,0.9890772911682097,0.9218537909637387,0.9542831334370452,9508,105,806,0.8910379625180201,0.9067237163814181,0.8988124091129422,7417,907,763,0.9988151360072605,0.999111088695696,0.9989630904317516,158480,188,141,0.890465293668955,0.9772308722584966,0.9318326947637293,5837,718,136,0.9437485375288533,0.989253133374841,0.8933181527544282
5
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,XX,0.9421306444541867,0.8613226452905811,0.8999162479061975,2149,132,346,0.9852213265450431,0.9196092667150683,0.9512852900010242,13933,209,1218,0.9083831393519974,0.8858128834355828,0.8969560490759436,11551,1165,1489,0.997423332724026,0.9988559077404002,0.9981391062081388,283743,733,325,0.8603268945022289,0.969740955783832,0.9117631620387382,8685,1410,271,0.9316119710460085,0.9887275647956504,0.8759653170414495
6
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,XX,0.963855421686747,0.8743169398907104,0.9169054441260744,320,12,46,0.9853095487932844,0.9475277497477296,0.9660493827160493,1878,28,104,0.9259687680740313,0.9086265607264472,0.9172156975078775,1601,128,161,0.9980380720080598,0.9988060176709385,0.9984218971713502,37644,74,45,0.8914728682170543,0.9693486590038314,0.9287812041116006,1265,154,40,0.9454747251265904,0.9908129175946548,0.9018762727299015
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - token-classification
5
+ language:
6
+ - multilingual
7
+ model-index:
8
+ - name: xx_LeetSpeakNER_mstsb_mpnet
9
+ results:
10
+ - task:
11
+ name: NER
12
+ type: token-classification
13
+ metrics:
14
+ - name: NER Precision
15
+ type: precision
16
+ value: 0.912373549
17
+ - name: NER Recall
18
+ type: recall
19
+ value: 0.9160452962
20
+ - name: NER F Score
21
+ type: f_score
22
+ value: 0.9142057358
23
+ ---
24
+ | Feature | Description |
25
+ | --- | --- |
26
+ | **Name** | `xx_LeetSpeakNER_mstsb_mpnet` |
27
+ | **Version** | `0.0.0` |
28
+ | **spaCy** | `>=3.4.3,<3.5.0` |
29
+ | **Default Pipeline** | `transformer`, `ner` |
30
+ | **Components** | `transformer`, `ner` |
31
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
+ | **Sources** | n/a |
33
+ | **License** | n/a |
34
+ | **Author** | [n/a]() |
35
+
36
+ ### Label Scheme
37
+
38
+ <details>
39
+
40
+ <summary>View label scheme (4 labels for 1 components)</summary>
41
+
42
+ | Component | Labels |
43
+ | --- | --- |
44
+ | **`ner`** | `INV_CAMO`, `LEETSPEAK`, `MIX`, `PUNCT_CAMO` |
45
+
46
+ </details>
47
+
48
+ ### Accuracy
49
+
50
+ | Type | Score |
51
+ | --- | --- |
52
+ | `ENTS_F` | 91.42 |
53
+ | `ENTS_P` | 91.24 |
54
+ | `ENTS_R` | 91.60 |
55
+ | `TRANSFORMER_LOSS` | 396910.59 |
56
+ | `NER_LOSS` | 373097.06 |
XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_test_DE.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,overall,DE,0.9737732656514383,0.9185953711093376,0.9453798767967146,1151,31,102,0.9890710382513661,0.9180050718512257,0.952213941253836,6516,72,582,0.9024707412223667,0.9166253921083044,0.9094929969694489,5552,600,505,0.9982093286267522,0.9992109709345707,0.9987098986348928,116507,209,92,0.9072652096869462,0.978343949044586,0.9414649095923997,4608,471,102,0.9494523246494584,0.9898096774906607,0.9068527193115795
3
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,DE,0.9611307420494699,0.9220338983050848,0.9411764705882353,272,11,23,0.9852420306965761,0.9195592286501377,0.951268167569108,1669,25,146,0.8957816377171216,0.878345498783455,0.886977886977887,1444,168,200,0.9985810275030258,0.9987060689540028,0.9986435443143638,23927,34,31,0.8551617873651772,0.977112676056338,0.9120788824979458,1110,188,26,0.9380289903895079,0.9852329450915142,0.8827126057820743
4
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,TED2020,DE,0.9760479041916168,0.9449275362318841,0.9602356406480118,326,8,19,0.9887278582930756,0.9141439205955335,0.9499742135121196,1842,21,173,0.8869512928442573,0.938295165394402,0.9119010819165378,1475,188,97,0.999345721015441,0.9992803402028132,0.9993130295397298,30548,20,22,0.9391941391941392,0.9930286599535244,0.9653614457831327,1282,83,9,0.9573570822799062,0.9910597044114771,0.9181269619610057
5
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,DE,0.978021978021978,0.898989898989899,0.9368421052631579,534,12,60,0.9911444141689373,0.919431279620853,0.953941976725127,2910,26,255,0.915077365958978,0.9260742898761836,0.9205429864253393,2543,236,203,0.997446949602122,0.999352213271323,0.9983986724745904,60166,154,39,0.9160599571734476,0.9696282864913871,0.9420832415767453,2139,196,67,0.9503617964929921,0.9909454988681874,0.9125937852210895
6
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,DE,1.0,1.0,1.0,19,0,0,1.0,0.9223300970873787,0.9595959595959597,95,0,8,0.9183673469387755,0.9473684210526315,0.9326424870466321,90,8,5,0.9994643813604713,1.0,0.999732118939191,1866,1,0,0.9506172839506173,1.0,0.9746835443037974,77,4,0,0.9733308219771161,0.9939814814814815,0.9483704066893089
XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_test_EN.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,overall,EN,0.9530102790014684,0.8562005277044855,0.9020152883947186,649,32,109,0.9817336965229931,0.8504997223764575,0.9114168835998512,6127,114,1077,0.808325449385052,0.8168260038240918,0.8125534950071327,4272,1013,958,0.997175853109603,0.9987901786879967,0.9979823630702868,114754,325,139,0.768174885612608,0.9639553429027113,0.8550007073136229,3022,912,113,0.8957937474771225,0.9817405883249505,0.8126320323379833
3
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,EN,0.95260663507109,0.8701298701298701,0.9095022624434389,201,10,30,0.9880174291938998,0.8528443817583451,0.9154680797375726,1814,22,313,0.7996289424860853,0.8363518758085382,0.8175782484982611,1293,324,253,0.9975686002084057,0.998840714120102,0.9982042518681573,25848,63,30,0.8021001615508885,0.9631425800193987,0.875275451740855,993,245,38,0.9032056588576569,0.9784506539447636,0.8244982676429068
4
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,TED2020,EN,0.9701492537313433,0.9605911330049262,0.9653465346534655,195,6,8,0.9887410440122825,0.8625,0.921316165951359,1932,22,308,0.799493991144845,0.8592794017675051,0.8283093053735255,1264,317,207,0.9994077934383513,0.999319022945966,0.9993634062208536,33752,20,23,0.8263157894736842,0.9822732012513035,0.8975702715578847,942,198,17,0.9223811367514176,0.985432622645415,0.8500299409269025
5
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,EN,0.937984496124031,0.7781350482315113,0.8506151142355008,242,16,69,0.9711986446421008,0.8386978785662034,0.9000981354268891,2293,68,441,0.8228287841191066,0.7718808193668529,0.7965409560413164,1658,357,490,0.9955136081015674,0.9983944140544779,0.9969519299782814,53477,241,86,0.6934306569343066,0.9474161378059837,0.8007662835249042,1045,462,58,0.8689944838413783,0.9808884211229724,0.7750419615611425
6
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,EN,1.0,0.8461538461538461,0.9166666666666666,11,0,2,0.9777777777777777,0.8543689320388349,0.9119170984455958,88,2,15,0.7916666666666666,0.8769230769230769,0.832116788321168,57,15,8,0.9994040524433849,1.0,0.9997019374068554,1677,1,0,0.8571428571428571,1.0,0.923076923076923,42,7,0,0.9166958827834419,0.9868421052631579,0.8514786046957316
XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_test_ES.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,overall,ES,0.9539347408829175,0.8603577611079054,0.9047330097087378,1491,72,242,0.9849003185089065,0.9667670217693377,0.9757494302576988,8349,128,287,0.9555640437993379,0.8962264150943396,0.9249445403007148,7505,349,869,0.9977464475249177,0.9987676326013396,0.9982567789031973,143449,324,177,0.867626126838526,0.9759829211884007,0.9186202277294039,5486,837,135,0.9444607973799506,0.989820822668016,0.9001883968278017
3
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,ES,0.940677966101695,0.8649350649350649,0.9012178619756427,333,21,52,0.9882604055496265,0.963579604578564,0.9757639620653319,1852,22,70,0.9522700814901047,0.8633245382585752,0.9056185995017991,1636,82,259,0.997602523659306,0.9982743381455449,0.9979383178356546,23718,57,41,0.8191340782122905,0.9840604026845637,0.8940548780487804,1173,259,19,0.9349187238854417,0.9848729118787088,0.880470954913311
4
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,TED2020,ES,0.9666666666666667,0.8726851851851852,0.9172749391727495,377,13,55,0.9871977240398293,0.9701770736253494,0.9786133960047003,2082,27,64,0.9634397528321318,0.9140205178309722,0.9380797192278767,1871,71,176,0.99800230982926,0.9990313710786152,0.9985165753189363,31973,64,31,0.8835616438356164,0.985485103132162,0.9317443120260023,1290,170,19,0.9528457883500531,0.9909062154040803,0.9152685404973484
5
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,ES,0.9473684210526315,0.8456375838926175,0.8936170212765957,504,28,92,0.9811584089323099,0.9663230240549828,0.9736842105263158,2812,54,98,0.9562178072111847,0.8983753888696855,0.9263945820709321,2599,119,294,0.9974167160002153,0.9987425676768039,0.9980792015222777,55599,144,70,0.8758992805755396,0.9667493796526054,0.9190846897853268,1948,276,67,0.9421719410362895,0.9903094424418333,0.8976568982433386
6
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,ES,0.9651567944250871,0.865625,0.9126853377265238,277,10,43,0.9846437346437347,0.9668275030156815,0.9756542909312234,1603,25,55,0.9428763440860215,0.9116309291747888,0.9269904195573174,1403,85,136,0.9982922436813016,0.9986643473939243,0.9984782608695651,32151,55,43,0.8906379453189727,0.9728506787330317,0.9299307958477508,1075,132,30,0.9487478209864761,0.99166123424598,0.908119686806149
XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_test_FR.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,overall,FR,0.9527991218441273,0.8902564102564102,0.9204665959703074,868,43,107,0.9879536679536679,0.8899554813578185,0.9363975700797774,6397,78,791,0.8528321863419799,0.8978264908043841,0.8747511312217194,4833,834,550,0.9985265148530454,0.9991405954333291,0.998833460759354,126723,187,109,0.8865153538050734,0.9767099779357685,0.929429604572495,3984,510,95,0.9319756725207305,0.9885640709692157,0.8738936814641317
3
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,FR,0.96,0.9037656903765691,0.9310344827586206,216,9,23,0.9867708959711365,0.8908794788273615,0.936376604850214,1641,22,201,0.8505291005291006,0.8443860801050558,0.8474464579901153,1286,226,237,0.9987760097919217,0.9985468451242829,0.9986614143113933,26112,32,38,0.8079800498753117,0.9788519637462235,0.8852459016393442,972,231,21,0.9197529723099376,0.9830877809217159,0.8502520497442401
4
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,TED2020,FR,0.9533898305084746,0.8928571428571429,0.9221311475409837,225,11,27,0.9873052510098096,0.8856107660455487,0.9336971350613915,1711,22,221,0.8480589022757697,0.9181159420289855,0.8816979819067502,1267,227,113,0.9987332385836989,0.9993816664090277,0.999057347282533,32325,41,20,0.9201954397394136,0.9843205574912892,0.9511784511784511,1130,98,18,0.9375524125940219,0.9892328035189033,0.8843576437138687
5
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,FR,0.9518348623853211,0.881104033970276,0.9151047409040793,415,21,56,0.9898339545916638,0.8954629061925199,0.9402864960566553,2921,30,341,0.8595073133179368,0.9208247422680412,0.8891100935695798,2233,365,192,0.9987018084110739,0.9993202930292274,0.9990109549946774,66160,86,45,0.9117794486215539,0.97638217928073,0.9429756350440643,1819,176,44,0.9372975841138113,0.9908657343787891,0.8846905151390063
6
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,FR,0.8571428571428571,0.9230769230769231,0.888888888888889,12,2,1,0.9848484848484849,0.7142857142857143,0.8280254777070063,65,1,26,0.6825396825396826,0.8958333333333334,0.7747747747747749,43,20,5,0.9893350062735258,0.9987333755541482,0.9940119760479043,1577,17,2,0.9402985074626866,0.863013698630137,0.9,63,4,10,0.8771402234837149,0.975609756097561,0.787175685826142
XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_test_IT.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,overall,IT,0.9111816019032514,0.9054373522458629,0.908300395256917,1149,112,120,0.9903846153846154,0.976303317535545,0.9832935560859187,6798,66,165,0.9581081081081081,0.8872203973095574,0.921302688215707,5672,248,721,0.9981698947689492,0.9982269841572455,0.9981984386468272,104720,192,186,0.8543046357615894,0.9694692343823391,0.9082508250825083,4128,704,130,0.9438691806575757,0.9893205373660018,0.8912935031760101
3
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,IT,0.9237057220708447,0.9416666666666667,0.9325997248968363,339,28,21,0.987865416436845,0.9781540141998908,0.9829857299670692,1791,22,40,0.9668874172185431,0.8728260869565218,0.9174521565267066,1606,55,234,0.9986455632521961,0.9978732454274777,0.9982592549611233,25806,35,55,0.8390966831333804,0.9850869925434963,0.90625,1189,228,18,0.947509373270347,0.9881668220843114,0.8953833601703967
4
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,TED2020,IT,0.9244791666666666,0.922077922077922,0.9232769830949284,355,29,30,0.9933469805527124,0.9798081776880363,0.986531130876747,1941,13,40,0.9367396593673966,0.9005847953216374,0.9183064997018485,1540,104,170,0.998563074352548,0.9984963410966685,0.998529706609637,29882,43,45,0.8759177679882526,0.9423380726698263,0.9079147640791476,1193,169,73,0.9469118168724618,0.9898494428534974,0.8972171482155227
5
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,IT,0.8919449901768173,0.8680688336520076,0.87984496124031,454,55,69,0.9897622192866579,0.9730519480519481,0.981335952848723,2997,31,83,0.9662317728319263,0.8903818953323904,0.9267574530732425,2518,88,310,0.9977708518235671,0.9982447445587082,0.9980077419354838,48341,108,85,0.8525073746312685,0.9802148106274732,0.911911648698396,1734,300,35,0.9395715515592311,0.989722035813937,0.8849438986122266
6
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,IT,1.0,1.0,1.0,1,0,0,1.0,1.0,1.0,27,0,0,1.0,0.5333333333333333,0.6956521739130436,8,0,7,1.0,1.0,1.0,373,0,0,0.5333333333333333,1.0,0.6956521739130436,8,7,0,0.8782608695652174,0.9834905660377359,0.8133333333333332
XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2_test_XX.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Dataset,lang,P-INV_CAM,R-INV_CAM,F1-INV_CAM,TP-INV_CAM,FN-INV_CAM,FP-INV_CAM,P-LEETSPEAK,R-LEETSPEAK,F1-LEETSPEAK,TP-LEETSPEAK,FN-LEETSPEAK,FP-LEETSPEAK,P-MIX,R-MIX,F1-MIX,TP-MIX,FN-MIX,FP-MIX,P-O,R-O,F1-O,TP-O,FN-O,FP-O,P-PUNCT_CAM,R-PUNCT_CAM,F1-PUNCT_CAM,TP-PUNCT_CAM,FN-PUNCT_CAM,FP-PUNCT_CAM,F1-Macro,F1-Micro,F1-Weighted
2
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,overall,XX,0.9481957842086459,0.8864395457581831,0.9162782668738132,5308,290,680,0.9867801991629384,0.9217557766453666,0.9531602866144366,34187,458,2902,0.9014184856532159,0.8853898272735948,0.8933322634999599,27834,3044,3603,0.9979634172442747,0.9988415703231079,0.9984023006870109,606153,1237,703,0.8607574405968696,0.9736274824565426,0.9137200043043149,21228,3434,575,0.9349786243959072,0.9879645549530486,0.8794994735051885
3
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,WikiMatrix,XX,0.9451388888888889,0.9013245033112582,0.9227118644067797,1361,79,149,0.9872747747747748,0.9192618223760092,0.9520551664223272,8767,113,770,0.8947044334975369,0.8599668560606061,0.8769917914051184,7265,855,1183,0.9982408940397351,0.9984475263920514,0.9983441995239574,125411,221,195,0.8252884031572556,0.9780536067638065,0.8952004610191817,5437,1151,122,0.9290606965554729,0.983943979822116,0.8664131850952455
4
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,TED2020,XX,0.9566343042071197,0.9140383426097711,0.9348513598987982,1478,67,139,0.9890772911682097,0.9218537909637387,0.9542831334370452,9508,105,806,0.8910379625180201,0.9067237163814181,0.8988124091129422,7417,907,763,0.9988151360072605,0.999111088695696,0.9989630904317516,158480,188,141,0.890465293668955,0.9772308722584966,0.9318326947637293,5837,718,136,0.9437485375288533,0.989253133374841,0.8933181527544282
5
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_ParaCrawl,XX,0.9421306444541867,0.8613226452905811,0.8999162479061975,2149,132,346,0.9852213265450431,0.9196092667150683,0.9512852900010242,13933,209,1218,0.9083831393519974,0.8858128834355828,0.8969560490759436,11551,1165,1489,0.997423332724026,0.9988559077404002,0.9981391062081388,283743,733,325,0.8603268945022289,0.969740955783832,0.9117631620387382,8685,1410,271,0.9316119710460085,0.9887275647956504,0.8759653170414495
6
+ XX-LeetSpeakNER-mstsb-paraphrase-multilingual-mpnet-base-v2,OPUS_News_Commentary,XX,0.963855421686747,0.8743169398907104,0.9169054441260744,320,12,46,0.9853095487932844,0.9475277497477296,0.9660493827160493,1878,28,104,0.9259687680740313,0.9086265607264472,0.9172156975078775,1601,128,161,0.9980380720080598,0.9988060176709385,0.9984218971713502,37644,74,45,0.8914728682170543,0.9693486590038314,0.9287812041116006,1265,154,40,0.9454747251265904,0.9908129175946548,0.9018762727299015
config.cfg ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "./Data/XX/11-18-22_17-51/NER_TRAIN_DATA.spacy"
3
+ dev = "./Data/XX/11-18-22_17-51/NER_DEV_DATA.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = "pytorch"
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "xx"
13
+ pipeline = ["transformer","ner"]
14
+ batch_size = 128
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+
21
+ [components]
22
+
23
+ [components.ner]
24
+ factory = "ner"
25
+ incorrect_spans_key = null
26
+ moves = null
27
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
28
+ update_with_oracle_cut_size = 100
29
+
30
+ [components.ner.model]
31
+ @architectures = "spacy.TransitionBasedParser.v2"
32
+ state_type = "ner"
33
+ extra_state_tokens = false
34
+ hidden_width = 64
35
+ maxout_pieces = 2
36
+ use_upper = false
37
+ nO = null
38
+
39
+ [components.ner.model.tok2vec]
40
+ @architectures = "spacy-transformers.TransformerListener.v1"
41
+ grad_factor = 1.0
42
+ pooling = {"@layers":"reduce_mean.v1"}
43
+ upstream = "*"
44
+
45
+ [components.transformer]
46
+ factory = "transformer"
47
+ max_batch_items = 4096
48
+ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
49
+
50
+ [components.transformer.model]
51
+ @architectures = "spacy-transformers.TransformerModel.v3"
52
+ name = "AIDA-UPM/mstsb-paraphrase-multilingual-mpnet-base-v2"
53
+ mixed_precision = false
54
+
55
+ [components.transformer.model.get_spans]
56
+ @span_getters = "spacy-transformers.strided_spans.v1"
57
+ window = 128
58
+ stride = 96
59
+
60
+ [components.transformer.model.grad_scaler_config]
61
+
62
+ [components.transformer.model.tokenizer_config]
63
+ use_fast = true
64
+
65
+ [components.transformer.model.transformer_config]
66
+
67
+ [corpora]
68
+
69
+ [corpora.dev]
70
+ @readers = "spacy.Corpus.v1"
71
+ path = ${paths.dev}
72
+ max_length = 0
73
+ gold_preproc = false
74
+ limit = 0
75
+ augmenter = null
76
+
77
+ [corpora.train]
78
+ @readers = "spacy.Corpus.v1"
79
+ path = ${paths.train}
80
+ max_length = 0
81
+ gold_preproc = false
82
+ limit = 0
83
+ augmenter = null
84
+
85
+ [training]
86
+ accumulate_gradient = 3
87
+ dev_corpus = "corpora.dev"
88
+ train_corpus = "corpora.train"
89
+ seed = ${system.seed}
90
+ gpu_allocator = ${system.gpu_allocator}
91
+ dropout = 0.1
92
+ patience = 1600
93
+ max_epochs = 0
94
+ max_steps = 20000
95
+ eval_frequency = 200
96
+ frozen_components = []
97
+ annotating_components = []
98
+ before_to_disk = null
99
+
100
+ [training.batcher]
101
+ @batchers = "spacy.batch_by_padded.v1"
102
+ discard_oversize = true
103
+ size = 2000
104
+ buffer = 256
105
+ get_length = null
106
+
107
+ [training.logger]
108
+ @loggers = "spacy.WandbLogger.v3"
109
+ project_name = "ASOC-LeetSpeakNER-full-XX-MultiNER"
110
+ remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
111
+ model_log_interval = null
112
+ log_dataset_dir = null
113
+ entity = null
114
+ run_name = null
115
+
116
+ [training.optimizer]
117
+ @optimizers = "Adam.v1"
118
+ beta1 = 0.9
119
+ beta2 = 0.999
120
+ L2_is_weight_decay = true
121
+ L2 = 0.01
122
+ grad_clip = 1.0
123
+ use_averages = false
124
+ eps = 0.00000001
125
+
126
+ [training.optimizer.learn_rate]
127
+ @schedules = "warmup_linear.v1"
128
+ warmup_steps = 250
129
+ total_steps = 20000
130
+ initial_rate = 0.00005
131
+
132
+ [training.score_weights]
133
+ ents_f = 1.0
134
+ ents_p = 0.0
135
+ ents_r = 0.0
136
+ ents_per_type = null
137
+
138
+ [pretraining]
139
+
140
+ [initialize]
141
+ vectors = ${paths.vectors}
142
+ init_tok2vec = ${paths.init_tok2vec}
143
+ vocab_data = null
144
+ lookups = null
145
+ before_init = null
146
+ after_init = null
147
+
148
+ [initialize.components]
149
+
150
+ [initialize.tokenizer]
meta.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"xx",
3
+ "name":"LeetSpeakNER_mstsb_mpnet",
4
+ "version":"0.0.0",
5
+ "description":"",
6
+ "author":"",
7
+ "email":"",
8
+ "url":"",
9
+ "license":"",
10
+ "spacy_version":">=3.4.3,<3.5.0",
11
+ "spacy_git_version":"Unknown",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "transformer":[
20
+
21
+ ],
22
+ "ner":[
23
+ "INV_CAMO",
24
+ "LEETSPEAK",
25
+ "MIX",
26
+ "PUNCT_CAMO"
27
+ ]
28
+ },
29
+ "pipeline":[
30
+ "transformer",
31
+ "ner"
32
+ ],
33
+ "components":[
34
+ "transformer",
35
+ "ner"
36
+ ],
37
+ "disabled":[
38
+
39
+ ],
40
+ "performance":{
41
+ "ents_f":0.9142057358,
42
+ "ents_p":0.912373549,
43
+ "ents_r":0.9160452962,
44
+ "ents_per_type":{
45
+ "LEETSPEAK":{
46
+ "p":0.9046705821,
47
+ "r":0.9793261537,
48
+ "f":0.9405192108
49
+ },
50
+ "MIX":{
51
+ "p":0.8851584562,
52
+ "r":0.8557820653,
53
+ "f":0.8702224142
54
+ },
55
+ "PUNCT_CAMO":{
56
+ "p":0.9769480235,
57
+ "r":0.8685420401,
58
+ "f":0.9195610923
59
+ },
60
+ "INV_CAMO":{
61
+ "p":0.8914035755,
62
+ "r":0.9441982272,
63
+ "f":0.9170416748
64
+ }
65
+ },
66
+ "transformer_loss":3969.1058882917,
67
+ "ner_loss":3730.9706013042
68
+ },
69
+ "requirements":[
70
+ "spacy-transformers>=1.1.8,<1.2.0"
71
+ ]
72
+ }
ner/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
ner/model ADDED
Binary file (226 kB). View file
 
ner/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves�D{"0":{},"1":{"LEETSPEAK":278858,"MIX":249011,"PUNCT_CAMO":200301,"INV_CAMO":44687},"2":{"LEETSPEAK":278858,"MIX":249011,"PUNCT_CAMO":200301,"INV_CAMO":44687},"3":{"LEETSPEAK":278858,"MIX":249011,"PUNCT_CAMO":200301,"INV_CAMO":44687},"4":{"LEETSPEAK":278858,"MIX":249011,"PUNCT_CAMO":200301,"INV_CAMO":44687,"":1},"5":{"":1}}�cfg��neg_key�
tokenizer ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ��prefix_search� ~^§|^%|^=|^—|^–|^\+(?![0-9])|^…|^……|^,|^:|^;|^\!|^\?|^¿|^؟|^¡|^\(|^\)|^\[|^\]|^\{|^\}|^<|^>|^_|^#|^\*|^&|^。|^?|^!|^,|^、|^;|^:|^~|^·|^।|^،|^۔|^؛|^٪|^\.\.+|^…|^\'|^"|^”|^“|^`|^‘|^´|^’|^‚|^,|^„|^»|^«|^「|^」|^『|^』|^(|^)|^〔|^〕|^【|^】|^《|^》|^〈|^〉|^\$|^£|^€|^¥|^฿|^US\$|^C\$|^A\$|^₽|^﷼|^₴|^₠|^₡|^₢|^₣|^₤|^₥|^₦|^₧|^₨|^₩|^₪|^₫|^€|^₭|^₮|^₯|^₰|^₱|^₲|^₳|^₴|^₵|^₶|^₷|^₸|^₹|^₺|^₻|^₼|^₽|^₾|^₿|^[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]�suffix_search�2y…$|……$|,$|:$|;$|\!$|\?$|¿$|؟$|¡$|\($|\)$|\[$|\]$|\{$|\}$|<$|>$|_$|#$|\*$|&$|。$|?$|!$|,$|、$|;$|:$|~$|·$|।$|،$|۔$|؛$|٪$|\.\.+$|…$|\'$|"$|”$|“$|`$|‘$|´$|’$|‚$|,$|„$|»$|«$|「$|」$|『$|』$|($|)$|〔$|〕$|【$|】$|《$|》$|〈$|〉$|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]$|'s$|'S$|’s$|’S$|—$|–$|(?<=[0-9])\+$|(?<=°[FfCcKk])\.$|(?<=[0-9])(?:\$|£|€|¥|฿|US\$|C\$|A\$|₽|﷼|₴|₠|₡|₢|₣|₤|₥|₦|₧|₨|₩|₪|₫|€|₭|₮|₯|₰|₱|₲|₳|₴|₵|₶|₷|₸|₹|₺|₻|₼|₽|₾|₿)$|(?<=[0-9])(?:km|km²|km³|m|m²|m³|dm|dm²|dm³|cm|cm²|cm³|mm|mm²|mm³|ha|µm|nm|yd|in|ft|kg|g|mg|µg|t|lb|oz|m/s|km/h|kmh|mph|hPa|Pa|mbar|mb|MB|kb|KB|gb|GB|tb|TB|T|G|M|K|%|км|км²|км³|м|м²|м³|дм|дм²|дм³|см|см²|см³|мм|мм²|мм³|нм|кг|г|мг|м/с|км/ч|кПа|Па|мбар|Кб|КБ|кб|Мб|МБ|мб|Гб|ГБ|гб|Тб|ТБ|тбكم|كم²|كم³|م|م²|م³|سم|سم²|سم³|مم|مم²|مم³|كم|غرام|جرام|جم|كغ|ملغ|كوب|اكواب)$|(?<=[0-9a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F%²\-\+…|……|,|:|;|\!|\?|¿|؟|¡|\(|\)|\[|\]|\{|\}|<|>|_|#|\*|&|。|?|!|,|、|;|:|~|·|।|،|۔|؛|٪(?:\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉)])\.$|(?<=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F][A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])\.$�infix_finditer�>�\.\.+|…|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]|(?<=[0-9])[+\-\*^](?=[0-9-])|(?<=[a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉])\.(?=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F]),(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])(?:-|–|—|--|---|——|~)(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])[:<>=/](?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])�token_match��url_match�
2
+ ��A�
3
+ � ��A� �'��A�'�''��A�''�(*_*)��A�(*_*)�(-8��A�(-8�(-:��A�(-:�(-;��A�(-;�(-_-)��A�(-_-)�(._.)��A�(._.)�(:��A�(:�(;��A�(;�(=��A�(=�(>_<)��A�(>_<)�(^_^)��A�(^_^)�(o:��A�(o:�(¬_¬)��A�(¬_¬)�(ಠ_ಠ)��A�(ಠ_ಠ)�(╯°□°)╯︵┻━┻��A�(╯°□°)╯︵┻━┻�)-:��A�)-:�):��A�):�-_-��A�-_-�-__-��A�-__-�._.��A�._.�0.0��A�0.0�0.o��A�0.o�0_0��A�0_0�0_o��A�0_o�8)��A�8)�8-)��A�8-)�8-D��A�8-D�8D��A�8D�:'(��A�:'(�:')��A�:')�:'-(��A�:'-(�:'-)��A�:'-)�:(��A�:(�:((��A�:((�:(((��A�:(((�:()��A�:()�:)��A�:)�:))��A�:))�:)))��A�:)))�:*��A�:*�:-(��A�:-(�:-((��A�:-((�:-(((��A�:-(((�:-)��A�:-)�:-))��A�:-))�:-)))��A�:-)))�:-*��A�:-*�:-/��A�:-/�:-0��A�:-0�:-3��A�:-3�:->��A�:->�:-D��A�:-D�:-O��A�:-O�:-P��A�:-P�:-X��A�:-X�:-]��A�:-]�:-o��A�:-o�:-p��A�:-p�:-x��A�:-x�:-|��A�:-|�:-}��A�:-}�:/��A�:/�:0��A�:0�:1��A�:1�:3��A�:3�:>��A�:>�:D��A�:D�:O��A�:O�:P��A�:P�:X��A�:X�:]��A�:]�:o��A�:o�:o)��A�:o)�:p��A�:p�:x��A�:x�:|��A�:|�:}��A�:}�;)��A�;)�;-)��A�;-)�;-D��A�;-D�;D��A�;D�;_;��A�;_;�<.<��A�<.<�</3��A�</3�<3��A�<3�<33��A�<33�<333��A�<333�<space>��A�<space>�=(��A�=(�=)��A�=)�=/��A�=/�=3��A�=3�=D��A�=D�=[��A�=[�=]��A�=]�=|��A�=|�>.<��A�>.<�>.>��A�>.>�>:(��A�>:(�>:o��A�>:o�><(((*>��A�><(((*>�@_@��A�@_@�C++��A�C++�O.O��A�O.O�O.o��A�O.o�O_O��A�O_O�O_o��A�O_o�V.V��A�V.V�V_V��A�V_V�XD��A�XD�XDD��A�XDD�[-:��A�[-:�[:��A�[:�[=��A�[=�\")��A�\")�\n��A�\n�\t��A�\t�]=��A�]=�^_^��A�^_^�^__^��A�^__^�^___^��A�^___^�a.��A�a.�b.��A�b.�c.��A�c.�d.��A�d.�e.��A�e.�f.��A�f.�g.��A�g.�h.��A�h.�i.��A�i.�j.��A�j.�k.��A�k.�l.��A�l.�m.��A�m.�n.��A�n.�o.��A�o.�o.0��A�o.0�o.O��A�o.O�o.o��A�o.o�o_0��A�o_0�o_O��A�o_O�o_o��A�o_o�p.��A�p.�q.��A�q.�r.��A�r.�s.��A�s.�t.��A�t.�u.��A�u.�v.��A�v.�v.v��A�v.v�v_v��A�v_v�w.��A�w.�x.��A�x.�xD��A�xD�xDD��A�xDD�y.��A�y.�z.��A�z.� ��A� C� �¯\(ツ)/¯��A�¯\(ツ)/¯�°C.��A�°�A�C�A�.�°F.��A�°�A�F�A�.�°K.��A�°�A�K�A�.�°c.��A�°�A�c�A�.�°f.��A�°�A�f�A�.�°k.��A�°�A�k�A�.�ä.��A�ä.�ö.��A�ö.�ü.��A�ü.�ಠ_ಠ��A�ಠ_ಠ�ಠ︵ಠ��A�ಠ︵ಠ�—��A�—�faster_heuristics�
transformer/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "max_batch_items":4096
3
+ }
transformer/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8e55ee74495a9b359d536752ea678bc436d213496efc203bd8ab3cd1045ba8a
3
+ size 1134411393
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f821bae84c6dc3934ba189b4ccad8870917ae2ede66c14c74413a11bcdcab0b2
3
+ size 16326872
vocab/vectors ADDED
Binary file (128 Bytes). View file
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }
xx_LeetSpeakNER_mstsb_mpnet-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9faccb1d287df26130159ac69f66ed984f388ede66a6819f20265a7d96f54da6
3
+ size 1021315756