Spaces:
Running
Running
feat: add ftfy
Browse files- dalle_mini/text.py +5 -1
dalle_mini/text.py
CHANGED
@@ -5,8 +5,9 @@ Utilities for processing text.
|
|
5 |
import requests
|
6 |
from pathlib import Path
|
7 |
from unidecode import unidecode
|
8 |
-
import re, math, random, html
|
9 |
|
|
|
|
|
10 |
|
11 |
WIKI_STATS_URL = "https://github.com/borisdayma/wikipedia-word-frequency/raw/feat-update/results/enwiki-20210820-words-frequency.txt"
|
12 |
WIKI_STATS_LOCAL = Path(WIKI_STATS_URL).parts[-1]
|
@@ -220,6 +221,9 @@ class TextNormalizer:
|
|
220 |
self._hashtag_processor = HashtagProcessor()
|
221 |
|
222 |
def __call__(self, t, clip=False):
|
|
|
|
|
|
|
223 |
# fix html
|
224 |
t = fix_html(t)
|
225 |
if not clip:
|
|
|
5 |
import requests
|
6 |
from pathlib import Path
|
7 |
from unidecode import unidecode
|
|
|
8 |
|
9 |
+
import re, math, random, html
|
10 |
+
import ftfy
|
11 |
|
12 |
WIKI_STATS_URL = "https://github.com/borisdayma/wikipedia-word-frequency/raw/feat-update/results/enwiki-20210820-words-frequency.txt"
|
13 |
WIKI_STATS_LOCAL = Path(WIKI_STATS_URL).parts[-1]
|
|
|
221 |
self._hashtag_processor = HashtagProcessor()
|
222 |
|
223 |
def __call__(self, t, clip=False):
|
224 |
+
|
225 |
+
# fix some characters
|
226 |
+
t = ftfy.fix_text(t)
|
227 |
# fix html
|
228 |
t = fix_html(t)
|
229 |
if not clip:
|