Spaces:

senyukhin
/

annotator_demo

Runtime error

File size: 5,840 Bytes

7d61140
eea4f3c
 
a8dc9d8
 
7d61140
 
 
eea4f3c
7d61140
 
 
 
 
 
 
 
eea4f3c
 
94c4f42
7d61140
 
 
 
94c4f42
 
 
 
 
 
 
a8dc9d8
 
 
 
eea4f3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94c4f42
eea4f3c
 
 
94c4f42
 
 
 
 
 
 
eea4f3c
 
 
 
 
 
 
94c4f42
eea4f3c
 
 
 
 
 
 
 
 
 
a8dc9d8
 
 
 
 
 
eea4f3c
a8dc9d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eea4f3c
 
 
a8dc9d8
94c4f42
eea4f3c
a8dc9d8
 
94c4f42
 
 
 
eea4f3c
94c4f42
eea4f3c

import networkx as nx
from collections import Counter
import word_transformations as wt
import sentiment_parser as sp
# import app


def build_graph(df):
    G = nx.DiGraph()

    # Связывание дат 
    for previous, current in zip(df['date_start'], df['date_start'][1:]):
        G.add_node(previous, group="Date", color = "blue")
        G.add_node(current, group="Date", color = "blue")
        G.add_edge(previous, current)

    # Добавление связей дат и фактов, фактов и слов
    for index, row in df[['date_start', 'locations']].iterrows():
        for fact in row['locations']:
            G.add_node(fact[0], group="Location", color = "green")
            G.add_node(fact[1], group="Fact", color = "red")
            G.add_edge(row['date_start'], fact[1])
            G.add_edge(fact[0], fact[1])

    for index, row in df[['date_start', 'activities']].iterrows():
        for fact in row['activities']:
            G.add_node(fact[0], group="Activity", color = "purple")
            # G.add_node(fact[1], group="Fact", color = "red")
            G.add_edge(row['date_start'], fact[0])
            # G.add_edge(fact[0], fact[1])

    # Добавление связей даты записи с сентиментом
    for index, row in df[['date_start', 'sent_index']].iterrows():
        G.add_node(sp.get_most_sentiment([row['sent_index']]), group="Sentiment", color = "pink")
        G.add_edge(row['date_start'], sp.get_most_sentiment([row['sent_index']]))
    return G


def dates_of_Diary_writing(G):
    dates = [key for key, value in G.nodes.data() if value.get('group') == 'Date']
    start = ''
    stop = ''
    for d in dates:
        if not [i for i in G.predecessors(d)]:
            start = d
        elif not [i for i in G.successors(d) if G.nodes.data()[i]['group'] == 'Date']:
            stop = d
    return (start, stop)


def most_visited_places(G):
    res = Counter()
    for key, value in G.nodes.data():
        if value.get('group') == 'Location':
            res[key] = len(G[key])
    return [i[0] for i in Counter(res).most_common(3)]

def most_activity(G):
    res = Counter()
    for key, value in G.nodes.data():
        if value.get('group') == 'Activity':
            res[key] = len(G[key])
    return [i[0] for i in Counter(res).most_common(1)]


def facts_for_annotation(G, gender, most_places):
    '''
    Собирает лист из фактов и его даты упоминания
    '''
    res = Counter()
    for key, value in G.nodes.data():
            if value.get('group') == 'Location':
                res[key] = len(G[key])
    facts = [s for i in Counter(res).keys() for s in G.successors(i)]
    
    res = []
    for fact in facts:
        if wt.get_fact_to_annotation(fact, gender, most_places):
                date = [i for i in G.predecessors(fact) if G.nodes()[i].get('group') == 'Date'][0]
                res.append((date, fact))
    return res

def sentiment_of_date(G):
    sentiment = dict()
    sentiment['positive'] = [date for date in G.predecessors('positive')]
    sentiment['negative'] = [date for date in G.predecessors('negative')]
    sentiment['neutral'] = [date for date in G.predecessors('neutral')]
    return sentiment


def constuct_fact_for_annotation(facts, sentiment, gender, locations):
    '''
    Собирает из отобранных фактов текст для аннотации.
    '''
    prompts = [f'В записях с преимущественно положительной тональностью {wt.get_noun(gender)} {wt.gender_transformer("писал", gender)} как {wt.get_pronoun(gender)}',
               f'Также в дневнике описывается, как {wt.get_pronoun(gender)}']
    

    positive_facts = []
    negative_facts = []
    if sentiment['positive']:
        for date in sentiment['positive']:
            for fact in facts:
                if date == fact[0]:
                    positive_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
    if sentiment['negative']:
        for date in sentiment['negative']:
            for fact in facts:
                print(fact[1])
                if date == fact[0]:
                    negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
    if sentiment['neutral']:
        for date in sentiment['neutral']:
            for fact in facts:
                print(fact[1])
                if date == fact[0]:
                    negative_facts.append(f"{wt.transform_fact(locations, fact[1], gender).lower()} ({fact[0]})")
    text = ''
    if positive_facts:
        text += f'{prompts[0]} {", ".join(positive_facts)}.'
    if negative_facts:
        text += f'\n\n{prompts[1]} {", ".join(negative_facts)}.'
    return text

def annotation(G, gender, locations):
    dates = dates_of_Diary_writing(G)
    most_places = most_visited_places(G)
    facts = facts_for_annotation(G, gender, most_places)
    sentiment = sentiment_of_date(G)
    activities = most_activity(G)

    # facts = ', '.join([f"{fact[1].lower()} ({fact[0]})" for fact in facts])
    # facts = ''
    if activities:
        act = f'В дневнике также упоминаниется игра в {activities[0]}.'
    else:
        act = ''

    annotation = f'{wt.get_noun(gender).title()} этого дневника {wt.gender_transformer("вести", gender)} его с {dates[0]} по {dates[1]}. Наиболее часто {wt.get_pronoun(gender)} {wt.gender_transformer("описывал", gender)} {wt.inflector(most_places[0], "accs")}, {wt.inflector(most_places[1], "accs")} и {wt.inflector(most_places[2], "accs")}. {act}\n\n{constuct_fact_for_annotation(facts, sentiment, gender, locations)}'

    return annotation