victormiller
commited on
Commit
•
7af4892
1
Parent(s):
a89d144
Update overview.py
Browse files- overview.py +3 -3
overview.py
CHANGED
@@ -167,7 +167,7 @@ dataset_comparison2 = pd.DataFrame(
|
|
167 |
)
|
168 |
|
169 |
table_html2 = dataset_comparison2.to_html(index=False, border=0)
|
170 |
-
|
171 |
|
172 |
dataset_sources = pd.DataFrame(
|
173 |
{
|
@@ -279,8 +279,8 @@ both critical for effective LLM pre-training."""),
|
|
279 |
P("By integrating the extensive reach of web data with the exceptional quality of curated sources, TxT360 is crafted to meet and surpass the rigorous standards required for state-of-the-art LLM pre-training."),
|
280 |
H3("TxT360 combines both the web data and highly-curated sources, which none of the existing datasets have covered."),
|
281 |
P("Table 1: The following table shows TxT360 and other well-known datasets on the coverage and size of data sources."),
|
282 |
-
|
283 |
-
|
284 |
P("Table 2: Basic TxT360 Statistics."),
|
285 |
table_div1,
|
286 |
),
|
|
|
167 |
)
|
168 |
|
169 |
table_html2 = dataset_comparison2.to_html(index=False, border=0)
|
170 |
+
table_div_2 = Div(NotStr(table_html2), style="margin: 40px;")
|
171 |
|
172 |
dataset_sources = pd.DataFrame(
|
173 |
{
|
|
|
279 |
P("By integrating the extensive reach of web data with the exceptional quality of curated sources, TxT360 is crafted to meet and surpass the rigorous standards required for state-of-the-art LLM pre-training."),
|
280 |
H3("TxT360 combines both the web data and highly-curated sources, which none of the existing datasets have covered."),
|
281 |
P("Table 1: The following table shows TxT360 and other well-known datasets on the coverage and size of data sources."),
|
282 |
+
table_div_1,
|
283 |
+
table_div_2,
|
284 |
P("Table 2: Basic TxT360 Statistics."),
|
285 |
table_div1,
|
286 |
),
|