Spaces:
Sleeping
Sleeping
import networkx as nx | |
from collections import Counter | |
import word_transformations as wt | |
import sentiment_parser as sp | |
# import app | |
def build_graph(df): | |
G = nx.DiGraph() | |
# Связывание дат | |
for previous, current in zip(df['date_start'], df['date_start'][1:]): | |
G.add_node(previous, group="Date", color = "blue") | |
G.add_node(current, group="Date", color = "blue") | |
G.add_edge(previous, current) | |
# Добавление связей дат и фактов, фактов и слов | |
for index, row in df[['date_start', 'locations']].iterrows(): | |
for fact in row['locations']: | |
G.add_node(fact[0], group="Location", color = "green") | |
G.add_node(fact[1], group="Fact", color = "red") | |
G.add_edge(row['date_start'], fact[1]) | |
G.add_edge(fact[0], fact[1]) | |
for index, row in df[['date_start', 'activities']].iterrows(): | |
for fact in row['activities']: | |
G.add_node(fact[0], group="Activity", color = "purple") | |
# G.add_node(fact[1], group="Fact", color = "red") | |
G.add_edge(row['date_start'], fact[0]) | |
# G.add_edge(fact[0], fact[1]) | |
# Добавление связей даты записи с сентиментом | |
for index, row in df[['date_start', 'sent_index']].iterrows(): | |
G.add_node(sp.get_most_sentiment([row['sent_index']]), group="Sentiment", color = "pink") | |
G.add_edge(row['date_start'], sp.get_most_sentiment([row['sent_index']])) | |
return G | |
def dates_of_Diary_writing(G): | |
dates = [key for key, value in G.nodes.data() if value.get('group') == 'Date'] | |
start = '' | |
stop = '' | |
for d in dates: | |
if not [i for i in G.predecessors(d)]: | |
start = d | |
elif not [i for i in G.successors(d) if G.nodes.data()[i]['group'] == 'Date']: | |
stop = d | |
return (start, stop) | |
def most_visited_places(G): | |
res = Counter() | |
for key, value in G.nodes.data(): | |
if value.get('group') == 'Location': | |
res[key] = len(G[key]) | |
return [i[0] for i in Counter(res).most_common(3)] | |
def most_activity(G): | |
res = Counter() | |
for key, value in G.nodes.data(): | |
if value.get('group') == 'Activity': | |
res[key] = len(G[key]) | |
return [i[0] for i in Counter(res).most_common(1)] | |
def facts_for_annotation(G, gender, most_places): | |
''' | |
Собирает лист из фактов и его даты упоминания | |
''' | |
res = Counter() | |
for key, value in G.nodes.data(): | |
if value.get('group') == 'Location': | |
res[key] = len(G[key]) | |
facts = [s for i in Counter(res).keys() for s in G.successors(i)] | |
res = [] | |
for fact in facts: | |
if wt.get_fact_to_annotation(fact, gender, most_places): | |
date = [i for i in G.predecessors(fact) if G.nodes()[i].get('group') == 'Date'][0] | |
res.append((date, fact)) | |
return res | |
def sentiment_of_date(G): | |
sentiment = dict() | |
sentiment['positive'] = [date for date in G.predecessors('positive')] | |
sentiment['negative'] = [date for date in G.predecessors('negative')] | |
sentiment['neutral'] = [date for date in G.predecessors('neutral')] | |
return sentiment | |
def constuct_fact_for_annotation(facts, sentiment, gender, locations): | |
''' | |
Собирает из отобранных фактов текст для аннотации. | |
''' | |
prompts = [f'В записях с преимущественно положительной тональностью {wt.get_noun(gender)} {wt.gender_transformer("писал", gender)} как {wt.get_pronoun(gender)}', | |
f'Также в дневнике описывается, как {wt.get_pronoun(gender)}'] | |
positive_facts = [] | |
negative_facts = [] | |
if sentiment['positive']: | |
for date in sentiment['positive']: | |
for fact in facts: | |
if date == fact[0]: | |
positive_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})") | |
if sentiment['negative']: | |
for date in sentiment['negative']: | |
for fact in facts: | |
print(fact[1]) | |
if date == fact[0]: | |
negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})") | |
if sentiment['neutral']: | |
for date in sentiment['neutral']: | |
for fact in facts: | |
print(fact[1]) | |
if date == fact[0]: | |
negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})") | |
text = '' | |
if positive_facts: | |
text += f'{prompts[0]} {", ".join(positive_facts)}.' | |
if negative_facts: | |
text += f'\n\n{prompts[1]} {", ".join(negative_facts)}.' | |
return text | |
def annotation(G, gender, locations): | |
dates = dates_of_Diary_writing(G) | |
most_places = most_visited_places(G) | |
facts = facts_for_annotation(G, gender, most_places) | |
sentiment = sentiment_of_date(G) | |
activities = most_activity(G) | |
# facts = ', '.join([f"{fact[1].lower()} ({fact[0]})" for fact in facts]) | |
# facts = '' | |
if activities: | |
act = f'В дневнике также упоминаниется игра в {activities[0]}.' | |
else: | |
act = '' | |
annotation = f'{wt.get_noun(gender).title()} этого дневника {wt.gender_transformer("вести", gender)} его с {dates[0]} по {dates[1]}. Наиболее часто {wt.get_pronoun(gender)} {wt.gender_transformer("описывал", gender)} {wt.inflector(most_places[0], "accs")}, {wt.inflector(most_places[1], "accs")} и {wt.inflector(most_places[2], "accs")}. {act}\n\n{constuct_fact_for_annotation(facts, sentiment, gender, locations)}' | |
return annotation |