Spaces:
Runtime error
Runtime error
import collections | |
import functools | |
import operator | |
import numpy as np | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from plotly.offline import iplot | |
from spacy import displacy | |
def no_of_tags(data): | |
tag_in_sent = {} | |
for tag in data["entity"]: | |
if tag != "": | |
if tag not in tag_in_sent: | |
tag_in_sent[tag] = 1 | |
else: | |
tag_in_sent[tag] += 1 | |
return tag_in_sent | |
def list_ents(data_frame): | |
index = 0 | |
ents_list = [] | |
for word, ent in zip(data_frame['words'], data_frame['entity']): | |
if ent != "": | |
ent_dict = {"start": index, "end": index + len(word) + 1, "label": ent.upper()} | |
ents_list.append(ent_dict) | |
index = index + len(word) + 1 | |
return ents_list | |
def color_creator(color_data): | |
color_dict = {} | |
for ner in range(0, len(color_data), 2): | |
color_dict[color_data[ner]] = color_data[ner + 1] | |
return color_dict | |
def options(data_frame): | |
option = {"ents": color_creator(data_frame).keys(), "colors": color_creator(data_frame)} | |
return option | |
def tag_display(sent, tag_colors): | |
# for sent in df_list: | |
ex = {"text": ' '.join(sent['words']), "ents": list_ents(sent), "title": None} | |
displacy.render(ex, style="ent", manual=True, options=options(tag_colors["NER"])) | |
def bubble_sentence(ls, tag_colors): | |
# for ls in data_frame: | |
tags_data = no_of_tags(ls) | |
tags_data = pd.DataFrame(tags_data.items(), columns=["Entities", "Counts"]) | |
data = [ | |
go.Scatter(x=tags_data["Entities"], y=tags_data["Counts"], | |
mode='markers', marker=dict(color=list(color_creator(tag_colors["NER"]).values()), | |
size=tags_data["Counts"] * 40, | |
) | |
) | |
] | |
layout = go.Layout(title=f'Words :{ls.count(axis=0)["clean_words"]} Tags :{ls.count(axis=0)["clean_entity"]}', | |
xaxis=dict(title='Tags'), | |
yaxis=dict(title='Count'), | |
hovermode="closest") | |
figure = go.Figure(data=data, layout=layout) | |
iplot(figure) | |
def bubble_document(data_frame, tag_colors): | |
doc_list = [] | |
for ls in data_frame: | |
doc_list.append(no_of_tags(ls)) | |
doc_tags = dict(functools.reduce(operator.add, map(collections.Counter, doc_list))) | |
doc_data = pd.DataFrame(doc_tags.items(), columns=["Entities", "Counts"]) | |
data = [ | |
go.Scatter(x=doc_data["Entities"], y=doc_data["Counts"], | |
mode='markers', marker=dict(color=list(color_creator(tag_colors["NER"]).values()), | |
size=doc_data["Counts"] * 15) | |
) | |
] | |
layout = go.Layout(title="Distribution of Tags in the Document", xaxis=dict(title='Tags'), | |
yaxis=dict(title='Count'), hovermode="closest") | |
figure = go.Figure(data=data, layout=layout) | |
iplot(figure) | |
def line_document(data_frame): | |
line_list = pd.DataFrame(columns=["sentence", "words", "tags", "words_to_tag"]) | |
total_words, total_tags = 0, 0 | |
for ls in data_frame: | |
line_list.loc[len(line_list) + 1] = [len(line_list) + 1, ls.count(axis=0)["clean_words"], | |
ls.count(axis=0)["clean_entity"], | |
f'{ls.count(axis=0)["clean_words"]} : {ls.count(axis=0)["clean_entity"]}'] | |
total_tags += ls.count(axis=0)["clean_entity"] | |
total_words += ls.count(axis=0)["clean_words"] | |
line_list["words_tag"] = line_list["words"] / line_list["tags"] | |
fig = px.line(line_list, x="sentence", y="words_tag", markers=True, text="words_to_tag", template="plotly_dark", | |
title=f'Total Words : {total_words} ' | |
f'Total Entities : {total_tags}', line_shape='vh') | |
# fig.show() | |
iplot(fig) | |
def scatter_document(data_frame, tags_data): | |
fig_list = [] | |
for sent, color in zip(data_frame, list(color_creator(tags_data["NER"]).values())): | |
tags_df = pd.DataFrame(tags_data["_ntags_"], columns=["entity"]) | |
sent_tags_data = pd.DataFrame(no_of_tags(sent).items(), columns=["entity", "count"]) | |
tags_df = pd.merge(left=tags_df, right=sent_tags_data, how="left", left_on=["entity"], right_on=["entity"]) | |
tags_df["count"] = tags_df['count'].replace(np.NAN, '0', regex=True).astype(int) | |
data = [ | |
go.Scatter(x=tags_df["entity"], y=tags_df["count"], | |
mode='markers', marker=dict(color=color, | |
size=tags_df["count"] * 25, | |
) | |
) | |
] | |
layout = go.Layout(title=' '.join(sent["words"]), xaxis=dict(title='Tags'), | |
yaxis=dict(title='Count'), | |
hovermode="closest", | |
template="plotly_dark") | |
figure = go.Figure(data=data, layout=layout) | |
fig_list.append(figure) | |
layout = go.Layout(title="Document Tag Distribution", xaxis=dict(title='Tags'), | |
yaxis=dict(title='Count'), | |
hovermode="closest", | |
template="plotly_dark") | |
fig = go.Figure(data=sum((fig.data for fig in fig_list), ()), layout=layout) | |
# iplot(fig) | |
return fig | |