ssa-perin / mtool /codec /norec.py
larkkin's picture
Add code and readme
c45d283
raw
history blame
No virus
5.63 kB
import json
import sys
from graph import Graph
def read(fp, text=None, node_centric=False):
def anchor(node):
anchors = list()
for string in node[1]:
string = string.split(":")
anchors.append({"from": int(string[0]), "to": int(string[1])})
return anchors
for native in json.load(fp):
map = dict()
try:
graph = Graph(native["sent_id"], flavor=1, framework="norec")
graph.add_input(native["text"])
if not node_centric:
top = graph.add_node(top=True)
for opinion in native["opinions"]:
expression = opinion["Polar_expression"]
properties, values = ["Intensity"], [opinion["Intensity"]]
if node_centric:
expression = graph.add_node(
label=opinion["Polarity"],
top=True,
properties=properties,
values=values,
anchors=anchor(expression),
)
else:
expression = graph.add_node(
properties=properties,
values=values,
anchors=anchor(expression),
)
key = tuple(opinion["Polar_expression"][1])
if key in map:
print("we got double expression here", native["sent_id"])
map[key] = expression
graph.add_edge(top.id, expression.id, opinion["Polarity"])
source = opinion["Source"]
if len(source[1]):
key = tuple(source[1])
if key in map:
source = map[key]
else:
source = graph.add_node(
label="Source" if node_centric else None,
anchors=anchor(source),
)
map[key] = source
graph.add_edge(expression.id, source.id, None if node_centric else "Source")
target = opinion["Target"]
if len(target[1]):
key = tuple(target[1])
if key in map:
target = map[key]
else:
target = graph.add_node(
label="Target" if node_centric else None,
anchors=anchor(target),
)
map[key] = target
graph.add_edge(expression.id, target.id, None if node_centric else "Target")
yield graph, None
except Exception as error:
print(
f"codec.norec.read(): ignoring {native}: {error}",
file=sys.stderr,
)
def get_text_span(node, text):
anchored_text = [text[anchor['from']:anchor['to']] for anchor in node.anchors]
anchors = [f"{anchor['from']}:{anchor['to']}" for anchor in node.anchors]
return anchored_text, anchors
def write(graph, input, node_centric=False):
try:
if node_centric:
return write_node_centric(graph, input)
return write_labeled_edge(graph, input)
except Exception as error:
print(f"Problem with decoding sentence {graph.id}")
raise error
def write_node_centric(graph, input):
nodes = {node.id: node for node in graph.nodes}
# create opinions
opinions = {}
for node in graph.nodes:
if node.label in ["Source", "Target"]:
continue
opinions[node.id] = {
"Source": [[], []],
"Target": [[], []],
"Polar_expression": [*get_text_span(node, input)],
"Polarity": node.label,
}
if node.properties is not None and len(node.properties) > 0:
for key, value in zip(node.properties, node.values):
opinions[node.id][key] = value
# add sources & targets
for edge in graph.edges:
if edge.src not in opinions:
continue
target_node = nodes[edge.tgt]
if target_node.label not in ["Source", "Target"]:
continue
anchored_text, anchors = get_text_span(target_node, input)
opinions[edge.src][target_node.label][0] += anchored_text
opinions[edge.src][target_node.label][1] += anchors
sentence = {
"sent_id": graph.id,
"text": input,
"opinions": list(opinions.values()),
}
return sentence
def write_labeled_edge(graph, input):
nodes = {node.id: node for node in graph.nodes}
# create opinions
opinions = {}
for edge in graph.edges:
if edge.lab in ["Source", "Target"]:
continue
node = nodes[edge.tgt]
opinions[node.id] = {
"Source": [[], []],
"Target": [[], []],
"Polar_expression": [*get_text_span(node, input)],
"Polarity": edge.lab,
}
# add sources & targets
for edge in graph.edges:
if edge.lab not in ["Source", "Target"]:
continue
if edge.src not in opinions:
continue
node = nodes[edge.tgt]
anchored_text, anchors = get_text_span(node, input)
opinions[edge.src][edge.lab][0] += anchored_text
opinions[edge.src][edge.lab][1] += anchors
sentence = {
"sent_id": graph.id,
"text": input,
"opinions": list(opinions.values()),
}
return sentence