annotator_demo / network_builder.py
senyukhin's picture
Upload 17 files
94c4f42
import networkx as nx
from collections import Counter
import word_transformations as wt
import sentiment_parser as sp
# import app
def build_graph(df):
G = nx.DiGraph()
# Связывание дат
for previous, current in zip(df['date_start'], df['date_start'][1:]):
G.add_node(previous, group="Date", color = "blue")
G.add_node(current, group="Date", color = "blue")
G.add_edge(previous, current)
# Добавление связей дат и фактов, фактов и слов
for index, row in df[['date_start', 'locations']].iterrows():
for fact in row['locations']:
G.add_node(fact[0], group="Location", color = "green")
G.add_node(fact[1], group="Fact", color = "red")
G.add_edge(row['date_start'], fact[1])
G.add_edge(fact[0], fact[1])
for index, row in df[['date_start', 'activities']].iterrows():
for fact in row['activities']:
G.add_node(fact[0], group="Activity", color = "purple")
# G.add_node(fact[1], group="Fact", color = "red")
G.add_edge(row['date_start'], fact[0])
# G.add_edge(fact[0], fact[1])
# Добавление связей даты записи с сентиментом
for index, row in df[['date_start', 'sent_index']].iterrows():
G.add_node(sp.get_most_sentiment([row['sent_index']]), group="Sentiment", color = "pink")
G.add_edge(row['date_start'], sp.get_most_sentiment([row['sent_index']]))
return G
def dates_of_Diary_writing(G):
dates = [key for key, value in G.nodes.data() if value.get('group') == 'Date']
start = ''
stop = ''
for d in dates:
if not [i for i in G.predecessors(d)]:
start = d
elif not [i for i in G.successors(d) if G.nodes.data()[i]['group'] == 'Date']:
stop = d
return (start, stop)
def most_visited_places(G):
res = Counter()
for key, value in G.nodes.data():
if value.get('group') == 'Location':
res[key] = len(G[key])
return [i[0] for i in Counter(res).most_common(3)]
def most_activity(G):
res = Counter()
for key, value in G.nodes.data():
if value.get('group') == 'Activity':
res[key] = len(G[key])
return [i[0] for i in Counter(res).most_common(1)]
def facts_for_annotation(G, gender, most_places):
'''
Собирает лист из фактов и его даты упоминания
'''
res = Counter()
for key, value in G.nodes.data():
if value.get('group') == 'Location':
res[key] = len(G[key])
facts = [s for i in Counter(res).keys() for s in G.successors(i)]
res = []
for fact in facts:
if wt.get_fact_to_annotation(fact, gender, most_places):
date = [i for i in G.predecessors(fact) if G.nodes()[i].get('group') == 'Date'][0]
res.append((date, fact))
return res
def sentiment_of_date(G):
sentiment = dict()
sentiment['positive'] = [date for date in G.predecessors('positive')]
sentiment['negative'] = [date for date in G.predecessors('negative')]
sentiment['neutral'] = [date for date in G.predecessors('neutral')]
return sentiment
def constuct_fact_for_annotation(facts, sentiment, gender, locations):
'''
Собирает из отобранных фактов текст для аннотации.
'''
prompts = [f'В записях с преимущественно положительной тональностью {wt.get_noun(gender)} {wt.gender_transformer("писал", gender)} как {wt.get_pronoun(gender)}',
f'Также в дневнике описывается, как {wt.get_pronoun(gender)}']
positive_facts = []
negative_facts = []
if sentiment['positive']:
for date in sentiment['positive']:
for fact in facts:
if date == fact[0]:
positive_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
if sentiment['negative']:
for date in sentiment['negative']:
for fact in facts:
print(fact[1])
if date == fact[0]:
negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
if sentiment['neutral']:
for date in sentiment['neutral']:
for fact in facts:
print(fact[1])
if date == fact[0]:
negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
text = ''
if positive_facts:
text += f'{prompts[0]} {", ".join(positive_facts)}.'
if negative_facts:
text += f'\n\n{prompts[1]} {", ".join(negative_facts)}.'
return text
def annotation(G, gender, locations):
dates = dates_of_Diary_writing(G)
most_places = most_visited_places(G)
facts = facts_for_annotation(G, gender, most_places)
sentiment = sentiment_of_date(G)
activities = most_activity(G)
# facts = ', '.join([f"{fact[1].lower()} ({fact[0]})" for fact in facts])
# facts = ''
if activities:
act = f'В дневнике также упоминаниется игра в {activities[0]}.'
else:
act = ''
annotation = f'{wt.get_noun(gender).title()} этого дневника {wt.gender_transformer("вести", gender)} его с {dates[0]} по {dates[1]}. Наиболее часто {wt.get_pronoun(gender)} {wt.gender_transformer("описывал", gender)} {wt.inflector(most_places[0], "accs")}, {wt.inflector(most_places[1], "accs")} и {wt.inflector(most_places[2], "accs")}. {act}\n\n{constuct_fact_for_annotation(facts, sentiment, gender, locations)}'
return annotation