CarisMu commited on
Commit
7136630
1 Parent(s): 12b422d

fix margin typo

Browse files
Files changed (1) hide show
  1. web.py +45 -45
web.py CHANGED
@@ -240,7 +240,7 @@ def web_data():
240
  border: 1px solid #c3e6cb; /* Green border */
241
  border-radius: 5px;
242
  padding: 15px 15px 0px 15px;
243
- marging-bottom: 15px
244
  """,
245
  ),
246
  H3("TxT360 CommonCrawl Filtering vs Other Pretraining Datasets"),
@@ -302,7 +302,7 @@ def web_data():
302
  padding: 15px;
303
  # border: 1px solid #949494; /* Grey border */
304
  border-radius: 12px;
305
- marging-bottom: 15px
306
  """, #https://colors.muz.li/palette/d3d3d3/949494/d3d3d3/d3d3d3/949494
307
  ),
308
  #DV2("data/sample_wet.json", "data/sample_warc.json", 3),
@@ -321,7 +321,7 @@ def web_data():
321
  background-color: #FAEAEA; /* Light pink background */
322
  padding: 15px;
323
  border-radius: 12px;
324
- marging-bottom: 15px
325
  """,
326
  ),
327
 
@@ -334,7 +334,7 @@ def web_data():
334
  background-color: #EAFFF1; /* Light green background */
335
  padding: 15px;
336
  border-radius: 12px;
337
- marging-bottom: 15px
338
  """,
339
  ),
340
 
@@ -357,7 +357,7 @@ def web_data():
357
  background-color: #FAEAEA; /* Light pink background */
358
  padding: 15px;
359
  border-radius: 12px;
360
- marging-bottom: 15px
361
  """,
362
  ),
363
 
@@ -371,7 +371,7 @@ def web_data():
371
  background-color: #FAEAEA; /* Light pink background */
372
  padding: 15px;
373
  border-radius: 12px;
374
- marging-bottom: 15px
375
  """,
376
  ),
377
 
@@ -386,7 +386,7 @@ def web_data():
386
  background-color: #FAEAEA; /* Light pink background */
387
  padding: 15px;
388
  border-radius: 12px;
389
- marging-bottom: 15px
390
  """,
391
  ),
392
 
@@ -405,7 +405,7 @@ def web_data():
405
  background-color: #FAEAEA; /* Light pink background */
406
  padding: 15px;
407
  border-radius: 12px;
408
- marging-bottom: 15px
409
  """,
410
  ),
411
 
@@ -416,7 +416,7 @@ def web_data():
416
  background-color: #EAFFF1; /* Light green background */
417
  padding: 15px;
418
  border-radius: 12px;
419
- marging-bottom: 15px
420
  """,
421
  ),
422
 
@@ -450,7 +450,7 @@ def web_data():
450
  background-color: #FAEAEA; /* Light pink background */
451
  padding: 15px;
452
  border-radius: 12px;
453
- marging-bottom: 15px
454
  """,
455
  ),
456
 
@@ -477,7 +477,7 @@ def web_data():
477
  background-color: #FAEAEA; /* Light pink background */
478
  padding: 15px;
479
  border-radius: 12px;
480
- marging-bottom: 15px
481
  """,
482
  ),
483
  H3("2.2 Other Rules from RefinedWeb"),
@@ -501,7 +501,7 @@ def web_data():
501
  background-color: #FAEAEA; /* Light pink background */
502
  padding: 15px;
503
  border-radius: 12px;
504
- marging-bottom: 15px
505
  """,
506
  ),
507
  H3("2.3 Toxic Lines"),
@@ -522,7 +522,7 @@ def web_data():
522
  background-color: #FAEAEA; /* Light pink background */
523
  padding: 15px;
524
  border-radius: 12px;
525
- marging-bottom: 15px
526
  """,
527
  ),
528
 
@@ -540,7 +540,7 @@ def web_data():
540
  background-color: #EAFFF1; /* Light green background */
541
  padding: 15px;
542
  border-radius: 12px;
543
- marging-bottom: 15px
544
  """,
545
  ),
546
  P("""Similar to previous sections, we will present sample documents filtered out by the given quality signals.
@@ -584,7 +584,7 @@ def web_data():
584
  background-color: #FFFAEA; /* Light yellow background */
585
  padding: 15px;
586
  border-radius: 12px;
587
- marging-bottom: 15px
588
  """,
589
  ),
590
  Details(
@@ -624,7 +624,7 @@ def web_data():
624
  background-color: #FFFAEA; /* Light yellow background */
625
  padding: 15px;
626
  border-radius: 12px;
627
- marging-bottom: 15px
628
  """,
629
  ),
630
  P("""
@@ -671,7 +671,7 @@ def web_data():
671
  background-color: #EAFFF1; /* Light green background */
672
  padding: 15px;
673
  border-radius: 12px;
674
- marging-bottom: 15px
675
  """,
676
  ),
677
  Details(
@@ -685,7 +685,7 @@ def web_data():
685
  background-color: #EAFFF1; /* Light green background */
686
  padding: 15px;
687
  border-radius: 12px;
688
- marging-bottom: 15px
689
  """,
690
  ),
691
  H3("3.1.2 Fraction of Characters in the Most Common N-grams (n=2,3,4)"),
@@ -714,7 +714,7 @@ def web_data():
714
  background-color: #FFFAEA; /* Light yellow background */
715
  padding: 15px;
716
  border-radius: 12px;
717
- marging-bottom: 15px
718
  """,
719
  ),
720
  Details(
@@ -758,7 +758,7 @@ def web_data():
758
  background-color: #FFFAEA; /* Light yellow background */
759
  padding: 15px;
760
  border-radius: 12px;
761
- marging-bottom: 15px
762
  """,
763
  ),
764
 
@@ -787,7 +787,7 @@ def web_data():
787
  background-color: #FFFAEA; /* Light yellow background */
788
  padding: 15px;
789
  border-radius: 12px;
790
- marging-bottom: 15px
791
  """,
792
  ),
793
  P("""
@@ -820,7 +820,7 @@ def web_data():
820
  background-color: #EAFFF1; /* Light green background */
821
  padding: 15px;
822
  border-radius: 12px;
823
- marging-bottom: 15px
824
  """,
825
  ),
826
  Details(
@@ -834,7 +834,7 @@ def web_data():
834
  background-color: #EAFFF1; /* Light green background */
835
  padding: 15px;
836
  border-radius: 12px;
837
- marging-bottom: 15px
838
  """,
839
  ),
840
  H3("3.1.3 Fraction of Characters in Duplicated N-grams (n=5,...,10)"),
@@ -866,7 +866,7 @@ def web_data():
866
  background-color: #FFFAEA; /* Light yellow background */
867
  padding: 15px;
868
  border-radius: 12px;
869
- marging-bottom: 15px
870
  """,
871
  ),
872
  Details(
@@ -925,7 +925,7 @@ def web_data():
925
  background-color: #FFFAEA; /* Light yellow background */
926
  padding: 15px;
927
  border-radius: 12px;
928
- marging-bottom: 15px
929
  """,
930
  ),
931
 
@@ -956,7 +956,7 @@ def web_data():
956
  background-color: #FFFAEA; /* Light yellow background */
957
  padding: 15px;
958
  border-radius: 12px;
959
- marging-bottom: 15px
960
  """,
961
  ),
962
  P("""
@@ -1015,7 +1015,7 @@ def web_data():
1015
  background-color: #EAFFF1; /* Light green background */
1016
  padding: 15px;
1017
  border-radius: 12px;
1018
- marging-bottom: 15px
1019
  """,
1020
  ),
1021
  Details(
@@ -1035,7 +1035,7 @@ def web_data():
1035
  background-color: #EAFFF1; /* Light green background */
1036
  padding: 15px;
1037
  border-radius: 12px;
1038
- marging-bottom: 15px
1039
  """,
1040
  ),
1041
  H5(
@@ -1052,7 +1052,7 @@ def web_data():
1052
  background-color: #EAFFF1; /* Light green background */
1053
  padding: 15px;
1054
  border-radius: 12px;
1055
- marging-bottom: 15px
1056
  """,
1057
  ),
1058
  H3("3.2 Line-wise Heuristics"),
@@ -1084,7 +1084,7 @@ def web_data():
1084
  background-color: #FFFAEA; /* Light yellow background */
1085
  padding: 15px;
1086
  border-radius: 12px;
1087
- marging-bottom: 15px
1088
  """,
1089
  ),
1090
  Details(
@@ -1134,7 +1134,7 @@ def web_data():
1134
  background-color: #FFFAEA; /* Light yellow background */
1135
  padding: 15px;
1136
  border-radius: 12px;
1137
- marging-bottom: 15px
1138
  """,
1139
  ),
1140
 
@@ -1150,7 +1150,7 @@ def web_data():
1150
  background-color: #EAFFF1; /* Light green background */
1151
  padding: 15px;
1152
  border-radius: 12px;
1153
- marging-bottom: 15px
1154
  """,
1155
  ),
1156
 
@@ -1216,7 +1216,7 @@ def web_data():
1216
  background-color: #FFFAEA; /* Light yellow background */
1217
  padding: 15px;
1218
  border-radius: 12px;
1219
- marging-bottom: 15px
1220
  """,
1221
  ),
1222
 
@@ -1233,7 +1233,7 @@ def web_data():
1233
  background-color: #FFFAEA; /* Light yellow background */
1234
  padding: 15px;
1235
  border-radius: 12px;
1236
- marging-bottom: 15px
1237
  """,
1238
  ),
1239
  P("""
@@ -1283,7 +1283,7 @@ def web_data():
1283
  background-color: #FFFAEA; /* Light yellow background */
1284
  padding: 15px;
1285
  border-radius: 12px;
1286
- marging-bottom: 15px
1287
  """,
1288
  ),
1289
  P("""
@@ -1305,7 +1305,7 @@ def web_data():
1305
  background-color: #EAFFF1; /* Light green background */
1306
  padding: 15px;
1307
  border-radius: 12px;
1308
- marging-bottom: 15px
1309
  """,
1310
  ),
1311
 
@@ -1327,7 +1327,7 @@ def web_data():
1327
  background-color: #FFFAEA; /* Light yellow background */
1328
  padding: 15px;
1329
  border-radius: 12px;
1330
- marging-bottom: 15px
1331
  """,
1332
  ),
1333
  Details(
@@ -1359,7 +1359,7 @@ def web_data():
1359
  background-color: #FFFAEA; /* Light yellow background */
1360
  padding: 15px;
1361
  border-radius: 12px;
1362
- marging-bottom: 15px
1363
  """,
1364
  ),
1365
 
@@ -1375,7 +1375,7 @@ def web_data():
1375
  background-color: #FFFAEA; /* Light yellow background */
1376
  padding: 15px;
1377
  border-radius: 12px;
1378
- marging-bottom: 15px
1379
  """,
1380
  ),
1381
  Details(
@@ -1391,7 +1391,7 @@ def web_data():
1391
  background-color: #EAFFF1; /* Light green background */
1392
  padding: 15px;
1393
  border-radius: 12px;
1394
- marging-bottom: 15px
1395
  """,
1396
  ),
1397
 
@@ -1407,7 +1407,7 @@ def web_data():
1407
  background-color: #FFFAEA; /* Light yellow background */
1408
  padding: 15px;
1409
  border-radius: 12px;
1410
- marging-bottom: 15px
1411
  """,
1412
  ),
1413
  Details(
@@ -1437,7 +1437,7 @@ def web_data():
1437
  background-color: #FFFAEA; /* Light yellow background */
1438
  padding: 15px;
1439
  border-radius: 12px;
1440
- marging-bottom: 15px
1441
  """,
1442
  ),
1443
  Details(
@@ -1454,7 +1454,7 @@ def web_data():
1454
  background-color: #FFFAEA; /* Light yellow background */
1455
  padding: 15px;
1456
  border-radius: 12px;
1457
- marging-bottom: 15px
1458
  """,
1459
  ),
1460
  P("""
@@ -1486,7 +1486,7 @@ def web_data():
1486
  background-color: #EAFFF1; /* Light green background */
1487
  padding: 15px;
1488
  border-radius: 12px;
1489
- marging-bottom: 15px
1490
  """,
1491
  ),
1492
  H3("3.4 Others"),
@@ -1502,7 +1502,7 @@ def web_data():
1502
  background-color: #FAEAEA; /* Light pink background */
1503
  padding: 15px;
1504
  border-radius: 12px;
1505
- marging-bottom: 15px
1506
  """,
1507
  ),
1508
  H2("4. Deduplication"),
 
240
  border: 1px solid #c3e6cb; /* Green border */
241
  border-radius: 5px;
242
  padding: 15px 15px 0px 15px;
243
+ margin-bottom: 15px
244
  """,
245
  ),
246
  H3("TxT360 CommonCrawl Filtering vs Other Pretraining Datasets"),
 
302
  padding: 15px;
303
  # border: 1px solid #949494; /* Grey border */
304
  border-radius: 12px;
305
+ margin-bottom: 15px
306
  """, #https://colors.muz.li/palette/d3d3d3/949494/d3d3d3/d3d3d3/949494
307
  ),
308
  #DV2("data/sample_wet.json", "data/sample_warc.json", 3),
 
321
  background-color: #FAEAEA; /* Light pink background */
322
  padding: 15px;
323
  border-radius: 12px;
324
+ margin-bottom: 15px
325
  """,
326
  ),
327
 
 
334
  background-color: #EAFFF1; /* Light green background */
335
  padding: 15px;
336
  border-radius: 12px;
337
+ margin-bottom: 15px
338
  """,
339
  ),
340
 
 
357
  background-color: #FAEAEA; /* Light pink background */
358
  padding: 15px;
359
  border-radius: 12px;
360
+ margin-bottom: 15px
361
  """,
362
  ),
363
 
 
371
  background-color: #FAEAEA; /* Light pink background */
372
  padding: 15px;
373
  border-radius: 12px;
374
+ margin-bottom: 15px
375
  """,
376
  ),
377
 
 
386
  background-color: #FAEAEA; /* Light pink background */
387
  padding: 15px;
388
  border-radius: 12px;
389
+ margin-bottom: 15px
390
  """,
391
  ),
392
 
 
405
  background-color: #FAEAEA; /* Light pink background */
406
  padding: 15px;
407
  border-radius: 12px;
408
+ margin-bottom: 15px
409
  """,
410
  ),
411
 
 
416
  background-color: #EAFFF1; /* Light green background */
417
  padding: 15px;
418
  border-radius: 12px;
419
+ margin-bottom: 15px
420
  """,
421
  ),
422
 
 
450
  background-color: #FAEAEA; /* Light pink background */
451
  padding: 15px;
452
  border-radius: 12px;
453
+ margin-bottom: 15px
454
  """,
455
  ),
456
 
 
477
  background-color: #FAEAEA; /* Light pink background */
478
  padding: 15px;
479
  border-radius: 12px;
480
+ margin-bottom: 15px
481
  """,
482
  ),
483
  H3("2.2 Other Rules from RefinedWeb"),
 
501
  background-color: #FAEAEA; /* Light pink background */
502
  padding: 15px;
503
  border-radius: 12px;
504
+ margin-bottom: 15px
505
  """,
506
  ),
507
  H3("2.3 Toxic Lines"),
 
522
  background-color: #FAEAEA; /* Light pink background */
523
  padding: 15px;
524
  border-radius: 12px;
525
+ margin-bottom: 15px
526
  """,
527
  ),
528
 
 
540
  background-color: #EAFFF1; /* Light green background */
541
  padding: 15px;
542
  border-radius: 12px;
543
+ margin-bottom: 15px
544
  """,
545
  ),
546
  P("""Similar to previous sections, we will present sample documents filtered out by the given quality signals.
 
584
  background-color: #FFFAEA; /* Light yellow background */
585
  padding: 15px;
586
  border-radius: 12px;
587
+ margin-bottom: 15px
588
  """,
589
  ),
590
  Details(
 
624
  background-color: #FFFAEA; /* Light yellow background */
625
  padding: 15px;
626
  border-radius: 12px;
627
+ margin-bottom: 15px
628
  """,
629
  ),
630
  P("""
 
671
  background-color: #EAFFF1; /* Light green background */
672
  padding: 15px;
673
  border-radius: 12px;
674
+ margin-bottom: 15px
675
  """,
676
  ),
677
  Details(
 
685
  background-color: #EAFFF1; /* Light green background */
686
  padding: 15px;
687
  border-radius: 12px;
688
+ margin-bottom: 15px
689
  """,
690
  ),
691
  H3("3.1.2 Fraction of Characters in the Most Common N-grams (n=2,3,4)"),
 
714
  background-color: #FFFAEA; /* Light yellow background */
715
  padding: 15px;
716
  border-radius: 12px;
717
+ margin-bottom: 15px
718
  """,
719
  ),
720
  Details(
 
758
  background-color: #FFFAEA; /* Light yellow background */
759
  padding: 15px;
760
  border-radius: 12px;
761
+ margin-bottom: 15px
762
  """,
763
  ),
764
 
 
787
  background-color: #FFFAEA; /* Light yellow background */
788
  padding: 15px;
789
  border-radius: 12px;
790
+ margin-bottom: 15px
791
  """,
792
  ),
793
  P("""
 
820
  background-color: #EAFFF1; /* Light green background */
821
  padding: 15px;
822
  border-radius: 12px;
823
+ margin-bottom: 15px
824
  """,
825
  ),
826
  Details(
 
834
  background-color: #EAFFF1; /* Light green background */
835
  padding: 15px;
836
  border-radius: 12px;
837
+ margin-bottom: 15px
838
  """,
839
  ),
840
  H3("3.1.3 Fraction of Characters in Duplicated N-grams (n=5,...,10)"),
 
866
  background-color: #FFFAEA; /* Light yellow background */
867
  padding: 15px;
868
  border-radius: 12px;
869
+ margin-bottom: 15px
870
  """,
871
  ),
872
  Details(
 
925
  background-color: #FFFAEA; /* Light yellow background */
926
  padding: 15px;
927
  border-radius: 12px;
928
+ margin-bottom: 15px
929
  """,
930
  ),
931
 
 
956
  background-color: #FFFAEA; /* Light yellow background */
957
  padding: 15px;
958
  border-radius: 12px;
959
+ margin-bottom: 15px
960
  """,
961
  ),
962
  P("""
 
1015
  background-color: #EAFFF1; /* Light green background */
1016
  padding: 15px;
1017
  border-radius: 12px;
1018
+ margin-bottom: 15px
1019
  """,
1020
  ),
1021
  Details(
 
1035
  background-color: #EAFFF1; /* Light green background */
1036
  padding: 15px;
1037
  border-radius: 12px;
1038
+ margin-bottom: 15px
1039
  """,
1040
  ),
1041
  H5(
 
1052
  background-color: #EAFFF1; /* Light green background */
1053
  padding: 15px;
1054
  border-radius: 12px;
1055
+ margin-bottom: 15px
1056
  """,
1057
  ),
1058
  H3("3.2 Line-wise Heuristics"),
 
1084
  background-color: #FFFAEA; /* Light yellow background */
1085
  padding: 15px;
1086
  border-radius: 12px;
1087
+ margin-bottom: 15px
1088
  """,
1089
  ),
1090
  Details(
 
1134
  background-color: #FFFAEA; /* Light yellow background */
1135
  padding: 15px;
1136
  border-radius: 12px;
1137
+ margin-bottom: 15px
1138
  """,
1139
  ),
1140
 
 
1150
  background-color: #EAFFF1; /* Light green background */
1151
  padding: 15px;
1152
  border-radius: 12px;
1153
+ margin-bottom: 15px
1154
  """,
1155
  ),
1156
 
 
1216
  background-color: #FFFAEA; /* Light yellow background */
1217
  padding: 15px;
1218
  border-radius: 12px;
1219
+ margin-bottom: 15px
1220
  """,
1221
  ),
1222
 
 
1233
  background-color: #FFFAEA; /* Light yellow background */
1234
  padding: 15px;
1235
  border-radius: 12px;
1236
+ margin-bottom: 15px
1237
  """,
1238
  ),
1239
  P("""
 
1283
  background-color: #FFFAEA; /* Light yellow background */
1284
  padding: 15px;
1285
  border-radius: 12px;
1286
+ margin-bottom: 15px
1287
  """,
1288
  ),
1289
  P("""
 
1305
  background-color: #EAFFF1; /* Light green background */
1306
  padding: 15px;
1307
  border-radius: 12px;
1308
+ margin-bottom: 15px
1309
  """,
1310
  ),
1311
 
 
1327
  background-color: #FFFAEA; /* Light yellow background */
1328
  padding: 15px;
1329
  border-radius: 12px;
1330
+ margin-bottom: 15px
1331
  """,
1332
  ),
1333
  Details(
 
1359
  background-color: #FFFAEA; /* Light yellow background */
1360
  padding: 15px;
1361
  border-radius: 12px;
1362
+ margin-bottom: 15px
1363
  """,
1364
  ),
1365
 
 
1375
  background-color: #FFFAEA; /* Light yellow background */
1376
  padding: 15px;
1377
  border-radius: 12px;
1378
+ margin-bottom: 15px
1379
  """,
1380
  ),
1381
  Details(
 
1391
  background-color: #EAFFF1; /* Light green background */
1392
  padding: 15px;
1393
  border-radius: 12px;
1394
+ margin-bottom: 15px
1395
  """,
1396
  ),
1397
 
 
1407
  background-color: #FFFAEA; /* Light yellow background */
1408
  padding: 15px;
1409
  border-radius: 12px;
1410
+ margin-bottom: 15px
1411
  """,
1412
  ),
1413
  Details(
 
1437
  background-color: #FFFAEA; /* Light yellow background */
1438
  padding: 15px;
1439
  border-radius: 12px;
1440
+ margin-bottom: 15px
1441
  """,
1442
  ),
1443
  Details(
 
1454
  background-color: #FFFAEA; /* Light yellow background */
1455
  padding: 15px;
1456
  border-radius: 12px;
1457
+ margin-bottom: 15px
1458
  """,
1459
  ),
1460
  P("""
 
1486
  background-color: #EAFFF1; /* Light green background */
1487
  padding: 15px;
1488
  border-radius: 12px;
1489
+ margin-bottom: 15px
1490
  """,
1491
  ),
1492
  H3("3.4 Others"),
 
1502
  background-color: #FAEAEA; /* Light pink background */
1503
  padding: 15px;
1504
  border-radius: 12px;
1505
+ margin-bottom: 15px
1506
  """,
1507
  ),
1508
  H2("4. Deduplication"),