Spaces:
Runtime error
Runtime error
File size: 5,840 Bytes
7d61140 eea4f3c a8dc9d8 7d61140 eea4f3c 7d61140 eea4f3c 94c4f42 7d61140 94c4f42 a8dc9d8 eea4f3c 94c4f42 eea4f3c 94c4f42 eea4f3c 94c4f42 eea4f3c a8dc9d8 eea4f3c a8dc9d8 eea4f3c a8dc9d8 94c4f42 eea4f3c a8dc9d8 94c4f42 eea4f3c 94c4f42 eea4f3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import networkx as nx
from collections import Counter
import word_transformations as wt
import sentiment_parser as sp
# import app
def build_graph(df):
G = nx.DiGraph()
# Связывание дат
for previous, current in zip(df['date_start'], df['date_start'][1:]):
G.add_node(previous, group="Date", color = "blue")
G.add_node(current, group="Date", color = "blue")
G.add_edge(previous, current)
# Добавление связей дат и фактов, фактов и слов
for index, row in df[['date_start', 'locations']].iterrows():
for fact in row['locations']:
G.add_node(fact[0], group="Location", color = "green")
G.add_node(fact[1], group="Fact", color = "red")
G.add_edge(row['date_start'], fact[1])
G.add_edge(fact[0], fact[1])
for index, row in df[['date_start', 'activities']].iterrows():
for fact in row['activities']:
G.add_node(fact[0], group="Activity", color = "purple")
# G.add_node(fact[1], group="Fact", color = "red")
G.add_edge(row['date_start'], fact[0])
# G.add_edge(fact[0], fact[1])
# Добавление связей даты записи с сентиментом
for index, row in df[['date_start', 'sent_index']].iterrows():
G.add_node(sp.get_most_sentiment([row['sent_index']]), group="Sentiment", color = "pink")
G.add_edge(row['date_start'], sp.get_most_sentiment([row['sent_index']]))
return G
def dates_of_Diary_writing(G):
dates = [key for key, value in G.nodes.data() if value.get('group') == 'Date']
start = ''
stop = ''
for d in dates:
if not [i for i in G.predecessors(d)]:
start = d
elif not [i for i in G.successors(d) if G.nodes.data()[i]['group'] == 'Date']:
stop = d
return (start, stop)
def most_visited_places(G):
res = Counter()
for key, value in G.nodes.data():
if value.get('group') == 'Location':
res[key] = len(G[key])
return [i[0] for i in Counter(res).most_common(3)]
def most_activity(G):
res = Counter()
for key, value in G.nodes.data():
if value.get('group') == 'Activity':
res[key] = len(G[key])
return [i[0] for i in Counter(res).most_common(1)]
def facts_for_annotation(G, gender, most_places):
'''
Собирает лист из фактов и его даты упоминания
'''
res = Counter()
for key, value in G.nodes.data():
if value.get('group') == 'Location':
res[key] = len(G[key])
facts = [s for i in Counter(res).keys() for s in G.successors(i)]
res = []
for fact in facts:
if wt.get_fact_to_annotation(fact, gender, most_places):
date = [i for i in G.predecessors(fact) if G.nodes()[i].get('group') == 'Date'][0]
res.append((date, fact))
return res
def sentiment_of_date(G):
sentiment = dict()
sentiment['positive'] = [date for date in G.predecessors('positive')]
sentiment['negative'] = [date for date in G.predecessors('negative')]
sentiment['neutral'] = [date for date in G.predecessors('neutral')]
return sentiment
def constuct_fact_for_annotation(facts, sentiment, gender, locations):
'''
Собирает из отобранных фактов текст для аннотации.
'''
prompts = [f'В записях с преимущественно положительной тональностью {wt.get_noun(gender)} {wt.gender_transformer("писал", gender)} как {wt.get_pronoun(gender)}',
f'Также в дневнике описывается, как {wt.get_pronoun(gender)}']
positive_facts = []
negative_facts = []
if sentiment['positive']:
for date in sentiment['positive']:
for fact in facts:
if date == fact[0]:
positive_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
if sentiment['negative']:
for date in sentiment['negative']:
for fact in facts:
print(fact[1])
if date == fact[0]:
negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
if sentiment['neutral']:
for date in sentiment['neutral']:
for fact in facts:
print(fact[1])
if date == fact[0]:
negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
text = ''
if positive_facts:
text += f'{prompts[0]} {", ".join(positive_facts)}.'
if negative_facts:
text += f'\n\n{prompts[1]} {", ".join(negative_facts)}.'
return text
def annotation(G, gender, locations):
dates = dates_of_Diary_writing(G)
most_places = most_visited_places(G)
facts = facts_for_annotation(G, gender, most_places)
sentiment = sentiment_of_date(G)
activities = most_activity(G)
# facts = ', '.join([f"{fact[1].lower()} ({fact[0]})" for fact in facts])
# facts = ''
if activities:
act = f'В дневнике также упоминаниется игра в {activities[0]}.'
else:
act = ''
annotation = f'{wt.get_noun(gender).title()} этого дневника {wt.gender_transformer("вести", gender)} его с {dates[0]} по {dates[1]}. Наиболее часто {wt.get_pronoun(gender)} {wt.gender_transformer("описывал", gender)} {wt.inflector(most_places[0], "accs")}, {wt.inflector(most_places[1], "accs")} и {wt.inflector(most_places[2], "accs")}. {act}\n\n{constuct_fact_for_annotation(facts, sentiment, gender, locations)}'
return annotation |