File size: 195 Bytes
1366553
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
import docx2txt
import re
import string
def split_string(path):
    doc = docx2txt.process(path)
    #global text_list
    res = re.sub('['+string.punctuation+']', '', doc).split()
    return res