Spaces:
Runtime error
Runtime error
victormiller
commited on
Commit
•
ff67812
1
Parent(s):
31dc904
Update curated.py
Browse files- curated.py +11 -10
curated.py
CHANGED
@@ -673,16 +673,17 @@ filtering_process = Div(
|
|
673 |
H4("Download and Extraction"),
|
674 |
P("The dataset was downloaded from:", A("https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/", href="https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/"), " based on the year."),
|
675 |
P("During extraction, the logs were cleaned using following functions:"),
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
#
|
683 |
-
|
684 |
-
|
685 |
-
|
|
|
686 |
H4("Filtering"),
|
687 |
Ol(
|
688 |
Li("Language Filter: English"),
|
|
|
673 |
H4("Download and Extraction"),
|
674 |
P("The dataset was downloaded from:", A("https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/", href="https://irclogs.ubuntu.com/{date.year}/{date.month:02d}/{date.day:02d}/"), " based on the year."),
|
675 |
P("During extraction, the logs were cleaned using following functions:"),
|
676 |
+
D_code("""
|
677 |
+
def exclude_system(x):
|
678 |
+
return '\n'.join(line for line in x.split('\n') if not line.startswith('==='))
|
679 |
+
|
680 |
+
def exclude_select_system(x):
|
681 |
+
return '\n'.join(line for line in x.split('\n') if not (line.startswith('===')
|
682 |
+
and any(term in line for term in ['has joined #', 'has left #', 'Topic for #', "Topic (#", "is now known as"]) ))
|
683 |
+
|
684 |
+
def clean(x):
|
685 |
+
return '\n'.join('* ' + line[4:] if line.startswith('===') else line[8:] for line in x.split('\n'))
|
686 |
+
""", block="block", language="python" ),
|
687 |
H4("Filtering"),
|
688 |
Ol(
|
689 |
Li("Language Filter: English"),
|