victormiller commited on
Commit
ff67812
1 Parent(s): 31dc904

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +11 -10
curated.py CHANGED
@@ -673,16 +673,17 @@ filtering_process = Div(
673
  H4("Download and Extraction"),
674
  P("The dataset was downloaded from:", A("https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/", href="https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/"), " based on the year."),
675
  P("During extraction, the logs were cleaned using following functions:"),
676
- #D_code("""
677
- #def exclude_system(x):
678
- # return '\n'.join(line for line in x.split('\n') if not line.startswith('==='))
679
- #
680
- # def exclude_select_system(x):
681
- # return '\n'.join(line for line in x.split('\n') if not (line.startswith('===') and any(term in line for term in ['has joined #', 'has left #', 'Topic for #', "Topic (#", "is now known as"]) ))
682
- #
683
- # def clean(x):
684
- # return '\n'.join('* ' + line[4:] if line.startswith('===') else line[8:] for line in x.split('\n'))
685
- # """, block="block", language="python" ),
 
686
  H4("Filtering"),
687
  Ol(
688
  Li("Language Filter: English"),
 
673
  H4("Download and Extraction"),
674
  P("The dataset was downloaded from:", A("https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/", href="https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/"), " based on the year."),
675
  P("During extraction, the logs were cleaned using following functions:"),
676
+ D_code("""
677
+ def exclude_system(x):
678
+ return '\n'.join(line for line in x.split('\n') if not line.startswith('==='))
679
+
680
+ def exclude_select_system(x):
681
+ return '\n'.join(line for line in x.split('\n') if not (line.startswith('===')
682
+ and any(term in line for term in ['has joined #', 'has left #', 'Topic for #', "Topic (#", "is now known as"]) ))
683
+
684
+ def clean(x):
685
+ return '\n'.join('* ' + line[4:] if line.startswith('===') else line[8:] for line in x.split('\n'))
686
+ """, block="block", language="python" ),
687
  H4("Filtering"),
688
  Ol(
689
  Li("Language Filter: English"),