victormiller commited on
Commit
8981902
1 Parent(s): 715785a

Update web.py

Browse files
Files changed (1) hide show
  1. web.py +4 -4
web.py CHANGED
@@ -392,10 +392,10 @@ def web_data():
392
  P("Note: All percentages are based on the number of documents. The gray bars represent the relative percentages of removed documents at each step, while the colorful bars represent the percentages of retained documents relative to the total number of documents in the raw Common Crawl."),
393
  P("We also adopt rules from RefinedWeb [1] to remove lines if they satisfy any of the following criteria:"),
394
  Ul(
395
- Li("the line is only composed of uppercase characters"),
396
- Li("the line is only composed of numerical characters"),
397
- Li("the line matches the pattern “r'^\d+\s+likes$"),
398
- Li("the line only contains one word."),
399
  ),
400
  P("We summarize other statistics-based rules originated from Gopher [7] in this section. The statistics can be used include:"),
401
  Ul(
 
392
  P("Note: All percentages are based on the number of documents. The gray bars represent the relative percentages of removed documents at each step, while the colorful bars represent the percentages of retained documents relative to the total number of documents in the raw Common Crawl."),
393
  P("We also adopt rules from RefinedWeb [1] to remove lines if they satisfy any of the following criteria:"),
394
  Ul(
395
+ Li("the line is only composed of uppercase characters", style = "margin-bottom: 5px"),
396
+ Li("the line is only composed of numerical characters", style = "margin-bottom: 5px"),
397
+ Li("the line matches the pattern “r'^\d+\s+likes$", style = "margin-bottom: 5px"),
398
+ Li("the line only contains one word.", style = "margin-bottom: 5px"),
399
  ),
400
  P("We summarize other statistics-based rules originated from Gopher [7] in this section. The statistics can be used include:"),
401
  Ul(