to_delete / dreamcoder /domains /regex /groundtruthRegexes.py
Fraser-Greenlee
add dreamcoder codebase
e1c1753
#dict of gt regexes
"""
pre.create(".+"),
pre.create("\d+"),
pre.create("\w+"),
pre.create("\s+"),
pre.create("\\u+"),
pre.create("\l+")
"""
gt_dict = {
776: "JPC\\u\\u\\d+\\.png",
922: "WHS\\d_\\d+",
354: "\\u+",
523: "(\\u)+|\\.",
184: "\\.\\d+",
501: "u\\d\\d",
760: "\\u\\u",
49: "(\\u)+\\u\\d?",
732: "\\uR5\\d\\d",
450: "-\\d(\\.(\\d)+)?",
350: "\\u\\u",
467: "hu\\d(\\d|\\u)+",
622: "A(\\d|\\u)**",
476: "\\u+",
554: "\\u\\u",
940: "\\u\\u?",
496: "\\u\\u",
369: "\\u\\u\\u",
596: "\\u+",
720: "\\(\\d\\d\\d\\) \\d\\d\\d-\\d\\d\\d\\d",
53: "rec-\\d\\d\\d?-(org)|(dup-0)",
150: "N\\d\\d",
741: "#\\d\\d\\d",
18: "A|C-\\d+-\\d+",
589: "A(\\u|\\d)++",
666: "\\(\\d\\d\\d\\) \\d\\d\\d-\\d\\d\\d\\d",
581: "us13\\u\\d\\d",
299: "E07000\\d\\d\\d",
638: "\\l+\\d+\\l+\\d+",
364: "\\u\\u",
334: "-00:\\d\\d:\\d\\d.\\d",
38: "SRX89\\d+",
247: "'\\d\\d:\\d\\d:00'",
506: "(S|H)\\d+",
891: "(r|v)\\d?",
911: "KW-\\d+",
792: "\\d*\\u*",
508: "N000\\d+",
842: "-?\\d?\\d\\.\\d\\d%",
200: "\\u\\u",
694: "\\(\\d+\\)",
210: "(\\d(\\.\\d)?)|(--)",
298: "DS_25(\\u|\\d)+",
668: "\\u+",
939: "ms0\\d+",
944: "\\u+\\d?",
731: "ManH.0\\d\\d",
229: "\\u+(-\\u+)?",
28: "Y201\\d/\\d\\d\\d\\d",
374: "q000\\d(_000\\d)?",
819: "\\d*\\l*\\d*",
516: "-122.3\\d+",
417: "\\u\\uT\\uB",
660: "ENGL?\\d\\d\\d",
585: "M?\\u+",
325: "BUS M \\d\\d\\d.*",
823: "\\u\\u\\u",
515: "L|\\u - (\\?\\?)|(\\d?\\d\\.\\d lbs\\.)",
864: "\\u+",
359: "MAM\\.OSBS\\.201\\d\\.\\d\\d",
594: "(\\u|\\d)+( (\\u|\\d)+)*",
788: "-\\d(,\\d+)?",
188: "cat\\. \\d\\d",
355: ".+",
799: "\\u\\d\\d",
902: "\\u\\d\\d",
920: "A\\.\\d\\d",
330: "Resp\\d\\d",
396: "\\u+(( |/)\\u+)?",
393: "US $ \\d\\.\\d\\d",
680: "Z:-?0\\.\\d\\d",
744: "t1_cv(\\l|\\d)+",
461: "(\\u|\\l)+\\d+",
631: "$\\d+\\.\\d+",
195: "(OLE)?\\d+",
693: "\\u",
577: "EFO_000\\d+",
392: "$\\d+(,\\d\\d\\d)*\\.00",
688: "\\u+( \\u+)*",
816: "\\u\\u\\u",
489: "UK\\u\\d",
251: "\\l\\l\\l",
653: "C\\d+",
769: "(\\u|\\l|\\d|-)+\\d+",
991: "Q\\d-201\\d",
342: "\\u\\u\\d\\d\\d\\d",
308: "\\u\\u\\u\\u",
136: "IMPC_\\u\\u\\u_\\d\\d\\d_\\d\\d\\d",
327: "#\\d+((/|-)\\d+)*",
981: "\\u\\u\\u",
892: "(.|\\l)*",
375: "P\\u\\.\\d\\d\\d\\d\\.\\d\\d\\d",
499: "A000\\d+",
474: "\\u+",
50: "V06\\d+",
381: "F?\\d+",
883: "-79.\\d+",
173: "(\\u|\\l)+\\d+",
147: "\\u\\u\\u-\\u\\u\\u",
419: "\\u\\u",
961: "-?\\d\\.\\d*",
148: "Q\\d\\d",
975: "(\\d|\\u)+",
79: "\\d+(,\\d\\d\\d)+",
775: "\\u\\l\\l \\d+ \\d\\d\\d\\d",
774: "FOS\\d\\d+",
561: ".+",
509: "S000\\d+",
494: "S1900\\d+",
119: "$\\d\\d(,\\d\\d\\d)+",
29: "(\\u|\\l|\\d)+",
121: "(\\d|\\u|\\.|/|\\(|\\))+",
61: "R \\d\\d\\d.\\d\\d",
871: "-0.7\\d+",
639: "\\u+?\\d+",
729: "COMISARIA \\d\\d",
193: "\\u\\d\\d",
752: "(.*|\\u\\.?)+",
17: "$\\d.\\d\\d",
914: "R\\d\\d\\d\\d",
510: "P\\d000\\d\\d\\d\\d",
443: "(W|L) \\d-\\d+",
20: "MDEL\\d\\d?\\.\\d\\l",
64: "c04p0100(\\l|\\d)",
301: "(\\u|\\d)+(-(\\u|\\d)+)*",
664: "N\\d",
493: "[0\\.0\\d+]",
765: "-?\\d\\.\\d+( \\(0\\.\\d+\\))?"
}
badRegexTasks = {
"Data column no. 922",
"Data column no. 184",
"Data column no. 467",
"Data column no. 476",
"Data column no. 150",
"Data column no. 299",
"Data column no. 334",
"Data column no. 493",
"Data column no. 891",
"Data column no. 792",
"Data column no. 765",
"Data column no. 944",
"Data column no. 374",
"Data column no. 660",
"Data column no. 188",
"Data column no. 920",
"Data column no. 330",
"Data column no. 396",
"Data column no. 680",
"Data column no. 769",
"Data column no. 308",
"Data column no. 375",
"Data column no. 474",
"Data column no. 79",
"Data column no. 871",
"Data column no. 729",
"Data column no. 664",
}