File size: 1,268 Bytes
876403c
 
 
 
eea4f3c
 
876403c
 
 
 
7d61140
876403c
 
 
a8dc9d8
876403c
 
 
 
 
 
7d61140
eea4f3c
94c4f42
876403c
 
a8dc9d8
 
876403c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import date_parser as dp
import preproc
import category_parser as cp
import sentiment_parser as sp
import word_transformations as wt


# def get_gender(tokens):
#     r = [token.feats['Gender'] for sent in tokenizing(text) for token in sent if (token.feats.get('Gender') and token.feats.get('Voice')) ]

# @st.experimental_memo
def analyze(text):
    # Разделение текста на датированные куски
    diary = dp.date_extractor_for_diary(text)
    
    # Очистка текста дневника
    diary = preproc.text_preproc(diary)

    # Токенизация текста дневника по предложениям
    diary['tokens'] = diary['text'].apply(lambda text: preproc.tokenizing(text))
    
    # # Выделение фактов из текста
    diary['locations'] = diary['tokens'].apply(lambda tokens: cp.get_facts(tokens, 'locations'))
    diary['activities'] = diary['tokens'].apply(lambda tokens: cp.get_facts(tokens, 'activities'))

    # Определение сентимента по записям
    diary['sent'] = diary['tokens'].apply(lambda tokens: sp.get_overall_sentiment(tokens))
    diary['sent_index'] = diary['sent'].apply(lambda sent: sp.get_sentiment_index(sent))

    return diary