Spaces:
Sleeping
Sleeping
Peter
commited on
Commit
•
904400a
1
Parent(s):
596d396
change to regex based split
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import logging
|
2 |
from pathlib import Path
|
3 |
import os
|
|
|
4 |
import gradio as gr
|
5 |
import nltk
|
6 |
import torch
|
@@ -29,7 +30,8 @@ def truncate_word_count(text, max_words=512):
|
|
29 |
-------
|
30 |
dict, the text and whether it was truncated
|
31 |
"""
|
32 |
-
|
|
|
33 |
processed = {}
|
34 |
if len(words) > max_words:
|
35 |
processed["was_truncated"] = True
|
|
|
1 |
import logging
|
2 |
from pathlib import Path
|
3 |
import os
|
4 |
+
import re
|
5 |
import gradio as gr
|
6 |
import nltk
|
7 |
import torch
|
|
|
30 |
-------
|
31 |
dict, the text and whether it was truncated
|
32 |
"""
|
33 |
+
# split on whitespace with regex
|
34 |
+
words = re.split(r"\s+", text)
|
35 |
processed = {}
|
36 |
if len(words) > max_words:
|
37 |
processed["was_truncated"] = True
|