File size: 5,840 Bytes
7d61140
eea4f3c
 
a8dc9d8
 
7d61140
 
 
eea4f3c
7d61140
 
 
 
 
 
 
 
eea4f3c
 
94c4f42
7d61140
 
 
 
94c4f42
 
 
 
 
 
 
a8dc9d8
 
 
 
eea4f3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94c4f42
eea4f3c
 
 
94c4f42
 
 
 
 
 
 
eea4f3c
 
 
 
 
 
 
94c4f42
eea4f3c
 
 
 
 
 
 
 
 
 
a8dc9d8
 
 
 
 
 
eea4f3c
a8dc9d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eea4f3c
 
 
a8dc9d8
94c4f42
eea4f3c
a8dc9d8
 
94c4f42
 
 
 
eea4f3c
94c4f42
eea4f3c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import networkx as nx
from collections import Counter
import word_transformations as wt
import sentiment_parser as sp
# import app


def build_graph(df):
    G = nx.DiGraph()

    # Связывание дат 
    for previous, current in zip(df['date_start'], df['date_start'][1:]):
        G.add_node(previous, group="Date", color = "blue")
        G.add_node(current, group="Date", color = "blue")
        G.add_edge(previous, current)

    # Добавление связей дат и фактов, фактов и слов
    for index, row in df[['date_start', 'locations']].iterrows():
        for fact in row['locations']:
            G.add_node(fact[0], group="Location", color = "green")
            G.add_node(fact[1], group="Fact", color = "red")
            G.add_edge(row['date_start'], fact[1])
            G.add_edge(fact[0], fact[1])

    for index, row in df[['date_start', 'activities']].iterrows():
        for fact in row['activities']:
            G.add_node(fact[0], group="Activity", color = "purple")
            # G.add_node(fact[1], group="Fact", color = "red")
            G.add_edge(row['date_start'], fact[0])
            # G.add_edge(fact[0], fact[1])

    # Добавление связей даты записи с сентиментом
    for index, row in df[['date_start', 'sent_index']].iterrows():
        G.add_node(sp.get_most_sentiment([row['sent_index']]), group="Sentiment", color = "pink")
        G.add_edge(row['date_start'], sp.get_most_sentiment([row['sent_index']]))
    return G


def dates_of_Diary_writing(G):
    dates = [key for key, value in G.nodes.data() if value.get('group') == 'Date']
    start = ''
    stop = ''
    for d in dates:
        if not [i for i in G.predecessors(d)]:
            start = d
        elif not [i for i in G.successors(d) if G.nodes.data()[i]['group'] == 'Date']:
            stop = d
    return (start, stop)


def most_visited_places(G):
    res = Counter()
    for key, value in G.nodes.data():
        if value.get('group') == 'Location':
            res[key] = len(G[key])
    return [i[0] for i in Counter(res).most_common(3)]

def most_activity(G):
    res = Counter()
    for key, value in G.nodes.data():
        if value.get('group') == 'Activity':
            res[key] = len(G[key])
    return [i[0] for i in Counter(res).most_common(1)]


def facts_for_annotation(G, gender, most_places):
    '''
    Собирает лист из фактов и его даты упоминания
    '''
    res = Counter()
    for key, value in G.nodes.data():
            if value.get('group') == 'Location':
                res[key] = len(G[key])
    facts = [s for i in Counter(res).keys() for s in G.successors(i)]
    
    res = []
    for fact in facts:
        if wt.get_fact_to_annotation(fact, gender, most_places):
                date = [i for i in G.predecessors(fact) if G.nodes()[i].get('group') == 'Date'][0]
                res.append((date, fact))
    return res

def sentiment_of_date(G):
    sentiment = dict()
    sentiment['positive'] = [date for date in G.predecessors('positive')]
    sentiment['negative'] = [date for date in G.predecessors('negative')]
    sentiment['neutral'] = [date for date in G.predecessors('neutral')]
    return sentiment


def constuct_fact_for_annotation(facts, sentiment, gender, locations):
    '''
    Собирает из отобранных фактов текст для аннотации.
    '''
    prompts = [f'В записях с преимущественно положительной тональностью {wt.get_noun(gender)} {wt.gender_transformer("писал", gender)} как {wt.get_pronoun(gender)}',
               f'Также в дневнике описывается, как {wt.get_pronoun(gender)}']
    

    positive_facts = []
    negative_facts = []
    if sentiment['positive']:
        for date in sentiment['positive']:
            for fact in facts:
                if date == fact[0]:
                    positive_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
    if sentiment['negative']:
        for date in sentiment['negative']:
            for fact in facts:
                print(fact[1])
                if date == fact[0]:
                    negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
    if sentiment['neutral']:
        for date in sentiment['neutral']:
            for fact in facts:
                print(fact[1])
                if date == fact[0]:
                    negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
    text = ''
    if positive_facts:
        text += f'{prompts[0]} {", ".join(positive_facts)}.'
    if negative_facts:
        text += f'\n\n{prompts[1]} {", ".join(negative_facts)}.'
    return text

def annotation(G, gender, locations):
    dates = dates_of_Diary_writing(G)
    most_places = most_visited_places(G)
    facts = facts_for_annotation(G, gender, most_places)
    sentiment = sentiment_of_date(G)
    activities = most_activity(G)

    # facts = ', '.join([f"{fact[1].lower()} ({fact[0]})" for fact in facts])
    # facts = ''
    if activities:
        act = f'В дневнике также упоминаниется игра в {activities[0]}.'
    else:
        act = ''

    annotation = f'{wt.get_noun(gender).title()} этого дневника {wt.gender_transformer("вести", gender)} его с {dates[0]} по {dates[1]}. Наиболее часто {wt.get_pronoun(gender)} {wt.gender_transformer("описывал", gender)} {wt.inflector(most_places[0], "accs")}, {wt.inflector(most_places[1], "accs")} и {wt.inflector(most_places[2], "accs")}. {act}\n\n{constuct_fact_for_annotation(facts, sentiment, gender, locations)}'

    return annotation