victormiller
commited on
Commit
•
3a168e8
1
Parent(s):
d29b7f7
Update web.py
Browse files
web.py
CHANGED
@@ -366,13 +366,13 @@ def web_data():
|
|
366 |
|
367 |
|
368 |
Details(
|
369 |
-
Summary("Non-English
|
370 |
Div(
|
371 |
DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
372 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
373 |
),
|
374 |
style="""
|
375 |
-
background-color: #
|
376 |
padding: 15px;
|
377 |
border-radius: 12px;
|
378 |
margin-bottom: 15px
|
@@ -382,13 +382,13 @@ def web_data():
|
|
382 |
#DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
383 |
|
384 |
Details(
|
385 |
-
Summary("English Documents Scoring Lower than 0.65"),
|
386 |
Div(
|
387 |
DV("data/sample_en_low.json", 3, "Sample documents that are classified as English but with score less than 0.65"),
|
388 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
389 |
),
|
390 |
style="""
|
391 |
-
background-color: #
|
392 |
padding: 15px;
|
393 |
border-radius: 12px;
|
394 |
margin-bottom: 15px
|
@@ -483,7 +483,7 @@ def web_data():
|
|
483 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
484 |
),
|
485 |
style="""
|
486 |
-
background-color: #
|
487 |
padding: 15px;
|
488 |
border-radius: 12px;
|
489 |
margin-bottom: 15px
|
@@ -510,7 +510,7 @@ def web_data():
|
|
510 |
"""),
|
511 |
|
512 |
Details(
|
513 |
-
Summary("
|
514 |
Div (
|
515 |
DV(
|
516 |
"data/sample_terminal_punc.json",
|
@@ -520,7 +520,7 @@ def web_data():
|
|
520 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
521 |
),
|
522 |
style="""
|
523 |
-
background-color: #
|
524 |
padding: 15px;
|
525 |
border-radius: 12px;
|
526 |
margin-bottom: 15px
|
@@ -539,7 +539,7 @@ def web_data():
|
|
539 |
The additional keyword could be any one of “enable” / “disable” / “require” / “activate” / “browser”.
|
540 |
"""),
|
541 |
Details(
|
542 |
-
Summary("
|
543 |
Div (
|
544 |
DV(
|
545 |
"data/sample_java.jsonl",
|
@@ -549,7 +549,7 @@ def web_data():
|
|
549 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
550 |
),
|
551 |
style="""
|
552 |
-
background-color: #
|
553 |
padding: 15px;
|
554 |
border-radius: 12px;
|
555 |
margin-bottom: 15px
|
@@ -565,7 +565,7 @@ def web_data():
|
|
565 |
Li("the line only contains one word.", style = "margin-bottom: 5px"),
|
566 |
),
|
567 |
Details(
|
568 |
-
Summary("
|
569 |
Div (
|
570 |
DV(
|
571 |
"data/sample_refinedweb_line.json",
|
@@ -575,7 +575,7 @@ def web_data():
|
|
575 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
576 |
),
|
577 |
style="""
|
578 |
-
background-color: #
|
579 |
padding: 15px;
|
580 |
border-radius: 12px;
|
581 |
margin-bottom: 15px
|
@@ -665,7 +665,7 @@ def web_data():
|
|
665 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
666 |
),
|
667 |
style="""
|
668 |
-
background-color: #
|
669 |
padding: 15px;
|
670 |
border-radius: 12px;
|
671 |
margin-bottom: 15px
|
@@ -708,7 +708,7 @@ def web_data():
|
|
708 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
709 |
),
|
710 |
style="""
|
711 |
-
background-color: #
|
712 |
padding: 15px;
|
713 |
border-radius: 12px;
|
714 |
margin-bottom: 15px
|
@@ -762,7 +762,7 @@ def web_data():
|
|
762 |
""",
|
763 |
),
|
764 |
Details(
|
765 |
-
Summary("
|
766 |
Div(
|
767 |
DV(
|
768 |
"data/repeat_line_frac.jsonl",
|
@@ -772,7 +772,7 @@ def web_data():
|
|
772 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
773 |
),
|
774 |
style="""
|
775 |
-
background-color: #
|
776 |
padding: 15px;
|
777 |
border-radius: 12px;
|
778 |
margin-bottom: 15px
|
@@ -803,7 +803,7 @@ def web_data():
|
|
803 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
804 |
),
|
805 |
style="""
|
806 |
-
background-color: #
|
807 |
padding: 15px;
|
808 |
border-radius: 12px;
|
809 |
margin-bottom: 15px
|
@@ -850,7 +850,7 @@ def web_data():
|
|
850 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
851 |
),
|
852 |
style="""
|
853 |
-
background-color: #
|
854 |
padding: 15px;
|
855 |
border-radius: 12px;
|
856 |
margin-bottom: 15px
|
@@ -882,7 +882,7 @@ def web_data():
|
|
882 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
883 |
),
|
884 |
style="""
|
885 |
-
background-color: #
|
886 |
padding: 15px;
|
887 |
border-radius: 12px;
|
888 |
margin-bottom: 15px
|
@@ -925,7 +925,7 @@ def web_data():
|
|
925 |
""",
|
926 |
),
|
927 |
Details(
|
928 |
-
Summary("
|
929 |
Div(
|
930 |
DV(
|
931 |
"data/sample_top_ngram.json",
|
@@ -935,7 +935,7 @@ def web_data():
|
|
935 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
936 |
),
|
937 |
style="""
|
938 |
-
background-color: #
|
939 |
padding: 15px;
|
940 |
border-radius: 12px;
|
941 |
margin-bottom: 15px
|
@@ -969,7 +969,7 @@ def web_data():
|
|
969 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
970 |
),
|
971 |
style="""
|
972 |
-
background-color: #
|
973 |
padding: 15px;
|
974 |
border-radius: 12px;
|
975 |
margin-bottom: 15px
|
@@ -1031,7 +1031,7 @@ def web_data():
|
|
1031 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1032 |
),
|
1033 |
style="""
|
1034 |
-
background-color: #
|
1035 |
padding: 15px;
|
1036 |
border-radius: 12px;
|
1037 |
margin-bottom: 15px
|
@@ -1065,7 +1065,7 @@ def web_data():
|
|
1065 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1066 |
),
|
1067 |
style="""
|
1068 |
-
background-color: #
|
1069 |
padding: 15px;
|
1070 |
border-radius: 12px;
|
1071 |
margin-bottom: 15px
|
@@ -1134,7 +1134,7 @@ def web_data():
|
|
1134 |
""",
|
1135 |
),
|
1136 |
Details(
|
1137 |
-
Summary("
|
1138 |
P("""
|
1139 |
Considering n = 5 and the sample sentence:
|
1140 |
|
@@ -1157,7 +1157,7 @@ def web_data():
|
|
1157 |
"Sample Documents Filtered by the Fraction of Characters in Duplicated N-grams (n=5,...,10)"
|
1158 |
),
|
1159 |
Details(
|
1160 |
-
Summary("
|
1161 |
Div(
|
1162 |
DV(
|
1163 |
"data/sample_dup_ngram.json",
|
@@ -1167,7 +1167,7 @@ def web_data():
|
|
1167 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1168 |
),
|
1169 |
style="""
|
1170 |
-
background-color: #
|
1171 |
padding: 15px;
|
1172 |
border-radius: 12px;
|
1173 |
margin-bottom: 15px
|
@@ -1201,7 +1201,7 @@ def web_data():
|
|
1201 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1202 |
),
|
1203 |
style="""
|
1204 |
-
background-color: #
|
1205 |
padding: 15px;
|
1206 |
border-radius: 12px;
|
1207 |
margin-bottom: 15px
|
@@ -1254,7 +1254,7 @@ def web_data():
|
|
1254 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1255 |
),
|
1256 |
style="""
|
1257 |
-
background-color: #
|
1258 |
padding: 15px;
|
1259 |
border-radius: 12px;
|
1260 |
margin-bottom: 15px
|
@@ -1263,7 +1263,7 @@ def web_data():
|
|
1263 |
|
1264 |
|
1265 |
Details(
|
1266 |
-
Summary("
|
1267 |
Div(
|
1268 |
DV(
|
1269 |
"data/line_info.json",
|
@@ -1273,7 +1273,7 @@ def web_data():
|
|
1273 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1274 |
),
|
1275 |
style="""
|
1276 |
-
background-color: #
|
1277 |
padding: 15px;
|
1278 |
border-radius: 12px;
|
1279 |
margin-bottom: 15px
|
@@ -1343,7 +1343,7 @@ def web_data():
|
|
1343 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1344 |
),
|
1345 |
style="""
|
1346 |
-
background-color: #
|
1347 |
padding: 15px;
|
1348 |
border-radius: 12px;
|
1349 |
margin-bottom: 15px
|
@@ -1363,7 +1363,7 @@ def web_data():
|
|
1363 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1364 |
),
|
1365 |
style="""
|
1366 |
-
background-color: #
|
1367 |
padding: 15px;
|
1368 |
border-radius: 12px;
|
1369 |
margin-bottom: 15px
|
@@ -1414,7 +1414,7 @@ def web_data():
|
|
1414 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1415 |
),
|
1416 |
style="""
|
1417 |
-
background-color: #
|
1418 |
padding: 15px;
|
1419 |
border-radius: 12px;
|
1420 |
margin-bottom: 15px
|
@@ -1463,7 +1463,7 @@ def web_data():
|
|
1463 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1464 |
),
|
1465 |
style="""
|
1466 |
-
background-color: #
|
1467 |
padding: 15px;
|
1468 |
border-radius: 12px;
|
1469 |
margin-bottom: 15px
|
@@ -1498,7 +1498,7 @@ def web_data():
|
|
1498 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1499 |
),
|
1500 |
style="""
|
1501 |
-
background-color: #
|
1502 |
padding: 15px;
|
1503 |
border-radius: 12px;
|
1504 |
margin-bottom: 15px
|
@@ -1517,7 +1517,7 @@ def web_data():
|
|
1517 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1518 |
),
|
1519 |
style="""
|
1520 |
-
background-color: #
|
1521 |
padding: 15px;
|
1522 |
border-radius: 12px;
|
1523 |
margin-bottom: 15px
|
@@ -1555,7 +1555,7 @@ def web_data():
|
|
1555 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1556 |
),
|
1557 |
style="""
|
1558 |
-
background-color: #
|
1559 |
padding: 15px;
|
1560 |
border-radius: 12px;
|
1561 |
margin-bottom: 15px
|
@@ -1588,7 +1588,7 @@ def web_data():
|
|
1588 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1589 |
),
|
1590 |
style="""
|
1591 |
-
background-color: #
|
1592 |
padding: 15px;
|
1593 |
border-radius: 12px;
|
1594 |
margin-bottom: 15px
|
@@ -1608,7 +1608,7 @@ def web_data():
|
|
1608 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1609 |
),
|
1610 |
style="""
|
1611 |
-
background-color: #
|
1612 |
padding: 15px;
|
1613 |
border-radius: 12px;
|
1614 |
margin-bottom: 15px
|
@@ -1632,7 +1632,7 @@ def web_data():
|
|
1632 |
""", block="block", language="python"),
|
1633 |
H3("TxT360 Implementation"),
|
1634 |
Details(
|
1635 |
-
Summary("
|
1636 |
Div(
|
1637 |
DV(
|
1638 |
"data/sample_doc_stat.json",
|
@@ -1642,7 +1642,7 @@ def web_data():
|
|
1642 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1643 |
),
|
1644 |
style="""
|
1645 |
-
background-color: #
|
1646 |
padding: 15px;
|
1647 |
border-radius: 12px;
|
1648 |
margin-bottom: 15px
|
@@ -1654,13 +1654,13 @@ def web_data():
|
|
1654 |
"""),
|
1655 |
|
1656 |
Details(
|
1657 |
-
Summary("
|
1658 |
Div(
|
1659 |
DV("data/lorem_ipsum.json", 0, "Sample documents containing 'lorem ipsum'"),
|
1660 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1661 |
),
|
1662 |
style="""
|
1663 |
-
background-color: #
|
1664 |
padding: 15px;
|
1665 |
border-radius: 12px;
|
1666 |
margin-bottom: 15px
|
|
|
366 |
|
367 |
|
368 |
Details(
|
369 |
+
Summary("Non-English Document Examples"),
|
370 |
Div(
|
371 |
DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
372 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
373 |
),
|
374 |
style="""
|
375 |
+
background-color: #F0F8FF; /* Light pink background */
|
376 |
padding: 15px;
|
377 |
border-radius: 12px;
|
378 |
margin-bottom: 15px
|
|
|
382 |
#DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
383 |
|
384 |
Details(
|
385 |
+
Summary("English Documents Scoring Lower than 0.65 Examples"),
|
386 |
Div(
|
387 |
DV("data/sample_en_low.json", 3, "Sample documents that are classified as English but with score less than 0.65"),
|
388 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
389 |
),
|
390 |
style="""
|
391 |
+
background-color: #F0F8FF; /* Light green background */
|
392 |
padding: 15px;
|
393 |
border-radius: 12px;
|
394 |
margin-bottom: 15px
|
|
|
483 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
484 |
),
|
485 |
style="""
|
486 |
+
background-color: #F0F8FF; /* Light green background */
|
487 |
padding: 15px;
|
488 |
border-radius: 12px;
|
489 |
margin-bottom: 15px
|
|
|
510 |
"""),
|
511 |
|
512 |
Details(
|
513 |
+
Summary("Terminal Punctuation Filtering Examples"),
|
514 |
Div (
|
515 |
DV(
|
516 |
"data/sample_terminal_punc.json",
|
|
|
520 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
521 |
),
|
522 |
style="""
|
523 |
+
background-color: #F0F8FF; /* Light pink background */
|
524 |
padding: 15px;
|
525 |
border-radius: 12px;
|
526 |
margin-bottom: 15px
|
|
|
539 |
The additional keyword could be any one of “enable” / “disable” / “require” / “activate” / “browser”.
|
540 |
"""),
|
541 |
Details(
|
542 |
+
Summary("Javascript Examples Filtered by C4 but Kept in TxT360"),
|
543 |
Div (
|
544 |
DV(
|
545 |
"data/sample_java.jsonl",
|
|
|
549 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
550 |
),
|
551 |
style="""
|
552 |
+
background-color: #F0F8FF; /* Light pink background */
|
553 |
padding: 15px;
|
554 |
border-radius: 12px;
|
555 |
margin-bottom: 15px
|
|
|
565 |
Li("the line only contains one word.", style = "margin-bottom: 5px"),
|
566 |
),
|
567 |
Details(
|
568 |
+
Summary("Documents Filtered using RefinedWeb Rules."),
|
569 |
Div (
|
570 |
DV(
|
571 |
"data/sample_refinedweb_line.json",
|
|
|
575 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
576 |
),
|
577 |
style="""
|
578 |
+
background-color: #F0F8FF; /* Light pink background */
|
579 |
padding: 15px;
|
580 |
border-radius: 12px;
|
581 |
margin-bottom: 15px
|
|
|
665 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
666 |
),
|
667 |
style="""
|
668 |
+
background-color: #EAFFF1; /* Light yellow background */
|
669 |
padding: 15px;
|
670 |
border-radius: 12px;
|
671 |
margin-bottom: 15px
|
|
|
708 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
709 |
),
|
710 |
style="""
|
711 |
+
background-color: #EAFFF1; /* Light yellow background */
|
712 |
padding: 15px;
|
713 |
border-radius: 12px;
|
714 |
margin-bottom: 15px
|
|
|
762 |
""",
|
763 |
),
|
764 |
Details(
|
765 |
+
Summary("Excessive Line and Character Repetition Filtered Examples"),
|
766 |
Div(
|
767 |
DV(
|
768 |
"data/repeat_line_frac.jsonl",
|
|
|
772 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
773 |
),
|
774 |
style="""
|
775 |
+
background-color: #F0F8FF; /* Light pink background */
|
776 |
padding: 15px;
|
777 |
border-radius: 12px;
|
778 |
margin-bottom: 15px
|
|
|
803 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
804 |
),
|
805 |
style="""
|
806 |
+
background-color: #EAFFF1; /* Light yellow background */
|
807 |
padding: 15px;
|
808 |
border-radius: 12px;
|
809 |
margin-bottom: 15px
|
|
|
850 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
851 |
),
|
852 |
style="""
|
853 |
+
background-color: #EAFFF1; /* Light yellow background */
|
854 |
padding: 15px;
|
855 |
border-radius: 12px;
|
856 |
margin-bottom: 15px
|
|
|
882 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
883 |
),
|
884 |
style="""
|
885 |
+
background-color: #EAFFF1; /* Light yellow background */
|
886 |
padding: 15px;
|
887 |
border-radius: 12px;
|
888 |
margin-bottom: 15px
|
|
|
925 |
""",
|
926 |
),
|
927 |
Details(
|
928 |
+
Summary("Documents Filtered Using Most Common n-Grams (n=2,3,4)"),
|
929 |
Div(
|
930 |
DV(
|
931 |
"data/sample_top_ngram.json",
|
|
|
935 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
936 |
),
|
937 |
style="""
|
938 |
+
background-color: #F0F8FF; /* Light pink background */
|
939 |
padding: 15px;
|
940 |
border-radius: 12px;
|
941 |
margin-bottom: 15px
|
|
|
969 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
970 |
),
|
971 |
style="""
|
972 |
+
background-color: #EAFFF1; /* Light yellow background */
|
973 |
padding: 15px;
|
974 |
border-radius: 12px;
|
975 |
margin-bottom: 15px
|
|
|
1031 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1032 |
),
|
1033 |
style="""
|
1034 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1035 |
padding: 15px;
|
1036 |
border-radius: 12px;
|
1037 |
margin-bottom: 15px
|
|
|
1065 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1066 |
),
|
1067 |
style="""
|
1068 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1069 |
padding: 15px;
|
1070 |
border-radius: 12px;
|
1071 |
margin-bottom: 15px
|
|
|
1134 |
""",
|
1135 |
),
|
1136 |
Details(
|
1137 |
+
Summary("Comparison of Coding Implementations"),
|
1138 |
P("""
|
1139 |
Considering n = 5 and the sample sentence:
|
1140 |
|
|
|
1157 |
"Sample Documents Filtered by the Fraction of Characters in Duplicated N-grams (n=5,...,10)"
|
1158 |
),
|
1159 |
Details(
|
1160 |
+
Summary("Documents Filtered by Duplicated n-Grams (n=5,...,10)"),
|
1161 |
Div(
|
1162 |
DV(
|
1163 |
"data/sample_dup_ngram.json",
|
|
|
1167 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1168 |
),
|
1169 |
style="""
|
1170 |
+
background-color: #F0F8FF; /* Light pink background */
|
1171 |
padding: 15px;
|
1172 |
border-radius: 12px;
|
1173 |
margin-bottom: 15px
|
|
|
1201 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1202 |
),
|
1203 |
style="""
|
1204 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1205 |
padding: 15px;
|
1206 |
border-radius: 12px;
|
1207 |
margin-bottom: 15px
|
|
|
1254 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1255 |
),
|
1256 |
style="""
|
1257 |
+
background-color: #EAFFF1; /* Light yellow background */ #light yellow FFFAEA
|
1258 |
padding: 15px;
|
1259 |
border-radius: 12px;
|
1260 |
margin-bottom: 15px
|
|
|
1263 |
|
1264 |
|
1265 |
Details(
|
1266 |
+
Summary("Documents Filtered by Line-Wise Heuristics"),
|
1267 |
Div(
|
1268 |
DV(
|
1269 |
"data/line_info.json",
|
|
|
1273 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1274 |
),
|
1275 |
style="""
|
1276 |
+
background-color: #F0F8FF; /* Light pink background */
|
1277 |
padding: 15px;
|
1278 |
border-radius: 12px;
|
1279 |
margin-bottom: 15px
|
|
|
1343 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1344 |
),
|
1345 |
style="""
|
1346 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1347 |
padding: 15px;
|
1348 |
border-radius: 12px;
|
1349 |
margin-bottom: 15px
|
|
|
1363 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1364 |
),
|
1365 |
style="""
|
1366 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1367 |
padding: 15px;
|
1368 |
border-radius: 12px;
|
1369 |
margin-bottom: 15px
|
|
|
1414 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1415 |
),
|
1416 |
style="""
|
1417 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1418 |
padding: 15px;
|
1419 |
border-radius: 12px;
|
1420 |
margin-bottom: 15px
|
|
|
1463 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1464 |
),
|
1465 |
style="""
|
1466 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1467 |
padding: 15px;
|
1468 |
border-radius: 12px;
|
1469 |
margin-bottom: 15px
|
|
|
1498 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1499 |
),
|
1500 |
style="""
|
1501 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1502 |
padding: 15px;
|
1503 |
border-radius: 12px;
|
1504 |
margin-bottom: 15px
|
|
|
1517 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1518 |
),
|
1519 |
style="""
|
1520 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1521 |
padding: 15px;
|
1522 |
border-radius: 12px;
|
1523 |
margin-bottom: 15px
|
|
|
1555 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1556 |
),
|
1557 |
style="""
|
1558 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1559 |
padding: 15px;
|
1560 |
border-radius: 12px;
|
1561 |
margin-bottom: 15px
|
|
|
1588 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1589 |
),
|
1590 |
style="""
|
1591 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1592 |
padding: 15px;
|
1593 |
border-radius: 12px;
|
1594 |
margin-bottom: 15px
|
|
|
1608 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1609 |
),
|
1610 |
style="""
|
1611 |
+
background-color: #EAFFF1; /* Light yellow background */
|
1612 |
padding: 15px;
|
1613 |
border-radius: 12px;
|
1614 |
margin-bottom: 15px
|
|
|
1632 |
""", block="block", language="python"),
|
1633 |
H3("TxT360 Implementation"),
|
1634 |
Details(
|
1635 |
+
Summary("Documents Filtered by Statistics-Based Heuristics"),
|
1636 |
Div(
|
1637 |
DV(
|
1638 |
"data/sample_doc_stat.json",
|
|
|
1642 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1643 |
),
|
1644 |
style="""
|
1645 |
+
background-color: #F0F8FF; /* Light pink background */
|
1646 |
padding: 15px;
|
1647 |
border-radius: 12px;
|
1648 |
margin-bottom: 15px
|
|
|
1654 |
"""),
|
1655 |
|
1656 |
Details(
|
1657 |
+
Summary("Documents Containing 'lorem ipsum'"),
|
1658 |
Div(
|
1659 |
DV("data/lorem_ipsum.json", 0, "Sample documents containing 'lorem ipsum'"),
|
1660 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
1661 |
),
|
1662 |
style="""
|
1663 |
+
background-color: #F0F8FF; /* Light pink background */
|
1664 |
padding: 15px;
|
1665 |
border-radius: 12px;
|
1666 |
margin-bottom: 15px
|