Spaces:
Runtime error
Runtime error
File size: 5,440 Bytes
06924e0 b6974ae 06924e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import collections
import functools
import operator
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import iplot
from spacy import displacy
def no_of_tags(data):
tag_in_sent = {}
for tag in data["entity"]:
if tag != "":
if tag not in tag_in_sent:
tag_in_sent[tag] = 1
else:
tag_in_sent[tag] += 1
return tag_in_sent
def list_ents(data_frame):
index = 0
ents_list = []
for word, ent in zip(data_frame['words'], data_frame['entity']):
if ent != "":
ent_dict = {"start": index, "end": index + len(word) + 1, "label": ent.upper()}
ents_list.append(ent_dict)
index = index + len(word) + 1
return ents_list
def color_creator(color_data):
color_dict = {}
for ner in range(0, len(color_data), 2):
color_dict[color_data[ner]] = color_data[ner + 1]
return color_dict
def options(data_frame):
option = {"ents": color_creator(data_frame).keys(), "colors": color_creator(data_frame)}
return option
def tag_display(sent, tag_colors):
# for sent in df_list:
ex = {"text": ' '.join(sent['words']), "ents": list_ents(sent), "title": None}
displacy.render(ex, style="ent", manual=True, options=options(tag_colors["NER"]))
def bubble_sentence(ls, tag_colors):
# for ls in data_frame:
tags_data = no_of_tags(ls)
tags_data = pd.DataFrame(tags_data.items(), columns=["Entities", "Counts"])
data = [
go.Scatter(x=tags_data["Entities"], y=tags_data["Counts"],
mode='markers', marker=dict(color=list(color_creator(tag_colors["NER"]).values()),
size=tags_data["Counts"] * 40,
)
)
]
layout = go.Layout(title=f'Words :{ls.count(axis=0)["clean_words"]} Tags :{ls.count(axis=0)["clean_entity"]}',
xaxis=dict(title='Tags'),
yaxis=dict(title='Count'),
hovermode="closest")
figure = go.Figure(data=data, layout=layout)
iplot(figure)
def bubble_document(data_frame, tag_colors):
doc_list = []
for ls in data_frame:
doc_list.append(no_of_tags(ls))
doc_tags = dict(functools.reduce(operator.add, map(collections.Counter, doc_list)))
doc_data = pd.DataFrame(doc_tags.items(), columns=["Entities", "Counts"])
data = [
go.Scatter(x=doc_data["Entities"], y=doc_data["Counts"],
mode='markers', marker=dict(color=list(color_creator(tag_colors["NER"]).values()),
size=doc_data["Counts"] * 15)
)
]
layout = go.Layout(title="Distribution of Tags in the Document", xaxis=dict(title='Tags'),
yaxis=dict(title='Count'), hovermode="closest")
figure = go.Figure(data=data, layout=layout)
iplot(figure)
def line_document(data_frame):
line_list = pd.DataFrame(columns=["sentence", "words", "tags", "words_to_tag"])
total_words, total_tags = 0, 0
for ls in data_frame:
line_list.loc[len(line_list) + 1] = [len(line_list) + 1, ls.count(axis=0)["clean_words"],
ls.count(axis=0)["clean_entity"],
f'{ls.count(axis=0)["clean_words"]} : {ls.count(axis=0)["clean_entity"]}']
total_tags += ls.count(axis=0)["clean_entity"]
total_words += ls.count(axis=0)["clean_words"]
line_list["words_tag"] = line_list["words"] / line_list["tags"]
fig = px.line(line_list, x="sentence", y="words_tag", markers=True, text="words_to_tag", template="plotly_dark",
title=f'Total Words : {total_words} '
f'Total Entities : {total_tags}', line_shape='vh')
# fig.show()
iplot(fig)
def scatter_document(data_frame, tags_data):
fig_list = []
for sent, color in zip(data_frame, list(color_creator(tags_data["NER"]).values())):
tags_df = pd.DataFrame(tags_data["_ntags_"], columns=["entity"])
sent_tags_data = pd.DataFrame(no_of_tags(sent).items(), columns=["entity", "count"])
tags_df = pd.merge(left=tags_df, right=sent_tags_data, how="left", left_on=["entity"], right_on=["entity"])
tags_df["count"] = tags_df['count'].replace(np.NAN, '0', regex=True).astype(int)
data = [
go.Scatter(x=tags_df["entity"], y=tags_df["count"],
mode='markers', marker=dict(color=color,
size=tags_df["count"] * 25,
)
)
]
layout = go.Layout(title=' '.join(sent["words"]), xaxis=dict(title='Tags'),
yaxis=dict(title='Count'),
hovermode="closest",
template="plotly_dark")
figure = go.Figure(data=data, layout=layout)
fig_list.append(figure)
layout = go.Layout(title="Document Tag Distribution", xaxis=dict(title='Tags'),
yaxis=dict(title='Count'),
hovermode="closest",
template="plotly_dark")
fig = go.Figure(data=sum((fig.data for fig in fig_list), ()), layout=layout)
# iplot(fig)
return fig
|