Update README.md
Browse files
README.md
CHANGED
@@ -30,6 +30,17 @@ In order to use 'palobert-base-greek-social-media', the text needs to be pre-pro
|
|
30 |
* convert to lowercase
|
31 |
* remove all punctuation
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
## Load Model
|
34 |
|
35 |
```python
|
|
|
30 |
* convert to lowercase
|
31 |
* remove all punctuation
|
32 |
|
33 |
+
```python
|
34 |
+
import re
|
35 |
+
import unicodedata
|
36 |
+
|
37 |
+
def preprocess(text, default_replace=""):
|
38 |
+
text = text.lower()
|
39 |
+
text = unicodedata.normalize('NFD',text).translate({ord('\N{COMBINING ACUTE ACCENT}'):None})
|
40 |
+
text = re.sub(r'[^\w\s]', default_replace, text)
|
41 |
+
return text
|
42 |
+
```
|
43 |
+
|
44 |
## Load Model
|
45 |
|
46 |
```python
|