|
""" |
|
Utilities for the verbalisation of an ontology. |
|
|
|
Examples of possible uses cases for ontology verbalisation: |
|
- Summarising the features provided by the ontology (doc) |
|
- Using a LM to extract competency questions from the ontology. |
|
- Asking a LM if the ontology can be used for certain requirements. |
|
|
|
""" |
|
import logging |
|
from typing import List |
|
|
|
import rdflib |
|
from rdflib import Graph |
|
from rdflib.namespace import RDF, RDFS, OWL |
|
|
|
from ontochat.queries import NE_QUERY |
|
|
|
logger = logging.getLogger("ontochat.verbaliser") |
|
|
|
|
|
def verbalise_ontology(ontology_path: str, onto_about: str, onto_desc: str): |
|
""" |
|
A simple method to verbalise ontologies and extract requirements. This is |
|
currently designed to produce a plain verbalisation. |
|
|
|
Parameters |
|
---------- |
|
ontology_path : str |
|
Path to the ontology encoded in a format that is readable by `rdflib`. |
|
onto_about : str |
|
A short description of the ontology, if documentation is missing. |
|
onto_desc : str |
|
An extended description of the ontology to provide more context. |
|
|
|
Returns |
|
------- |
|
verbalisation : str |
|
A string verbalisation of the ontology produced by the language model. |
|
|
|
""" |
|
g = Graph() |
|
g.parse(ontology_path) |
|
|
|
|
|
label_dict = {s: str(o) for s, _, o in g.triples((None, RDFS.label, None))} |
|
|
|
label_fn = lambda x: label_dict[x] if x in label_dict else str(x).split("/")[-1] |
|
comment_dict = {s: str(o) for s, _, o in g.triples((None, RDFS.comment, None))} |
|
|
|
logger.info("Class verbalisation: start") |
|
class_vrbs = verbalise_classes(g, label_fn, comment_dict) |
|
logger.info(f"Class verbalisation: found {len(class_vrbs)} classes") |
|
|
|
logger.info("Named entity verbalisation: start") |
|
nament_vrbs = verbalise_named_entities(g, label_fn, comment_dict) |
|
logger.info(f"Named entity verbalisation: found {len(class_vrbs)} entities") |
|
|
|
logger.info("Relation verbalisation: start") |
|
relat_vrbs = verbalise_relations(g, label_fn, comment_dict) |
|
logger.info(f"Relation verbalisation: found {len(class_vrbs)} classes") |
|
|
|
return collate_verbalisations(class_vrbs, nament_vrbs, relat_vrbs, onto_about, onto_desc) |
|
|
|
|
|
def create_relation_dict(graph, relation): |
|
""" |
|
Returns all the objects appearing as tails of the given relation. |
|
""" |
|
relation_dict = {} |
|
for s, p, o in graph.triples((None, relation, None)): |
|
if isinstance(o, rdflib.term.BNode): |
|
continue |
|
if s not in relation_dict: |
|
relation_dict[s] = [] |
|
relation_dict[s].append(o) |
|
return relation_dict |
|
|
|
|
|
def verbalise_classes(graph: rdflib.Graph, label_fn, comment_dict: dict): |
|
|
|
classes = [s for s, _, _ in graph.triples((None, RDF.type, OWL.Class))] |
|
subclasses = create_relation_dict(graph, relation=RDFS.subClassOf) |
|
logger.info(f"Found: {len(classes)} classes, {len(subclasses)} subclasses") |
|
|
|
verbalisation_hist = [] |
|
class_verbalisations = [] |
|
for base_class in classes: |
|
|
|
vrbn = f"{label_fn(base_class)}" |
|
|
|
if base_class in subclasses: |
|
vrbn += " (subconcept of " |
|
vrbn += ", ".join([label_fn(u) for u in subclasses[base_class]]) |
|
vrbn += ")" |
|
|
|
if base_class in comment_dict: |
|
vrbn += f": {comment_dict[base_class]}" |
|
|
|
verbalisation_hist.append(base_class) |
|
class_verbalisations.append(vrbn) |
|
|
|
|
|
for subclass in subclasses: |
|
if subclass not in verbalisation_hist: |
|
raise NotImplementedError(subclass) |
|
|
|
return class_verbalisations |
|
|
|
|
|
def verbalise_named_entities(graph: rdflib.Graph, label, comment_dict: dict): |
|
""" |
|
Note: TODO append NE comment (if available) to each named entity. |
|
Note: FIXME still, a named entity can have more than 1 parent class. |
|
""" |
|
qres = graph.query(NE_QUERY) |
|
named_entities = {n: c for n, c in list(qres)} |
|
|
|
nentities_verbalisations = [] |
|
for named_entity, named_type in named_entities.items(): |
|
verbalisation = f"{label(named_entity)} is an instance of class {label(named_type)}." |
|
nentities_verbalisations.append(verbalisation) |
|
|
|
return nentities_verbalisations |
|
|
|
|
|
def verbalise_relations(graph: rdflib.Graph, label, comment_dict: dict): |
|
properties = [s for s, _, _ in graph.triples( |
|
(None, RDF.type, OWL.ObjectProperty))] |
|
subprops = create_relation_dict(graph, relation=RDFS.subPropertyOf) |
|
domains = create_relation_dict(graph, relation=RDFS.domain) |
|
ranges = create_relation_dict(graph, relation=RDFS.range) |
|
|
|
|
|
verbalisation_hist = [] |
|
relation_verbalisations = [] |
|
|
|
for base_prop in properties: |
|
|
|
|
|
verbalisation = f"{label(base_prop)}" |
|
|
|
if base_prop in subprops: |
|
verbalisation += " (subproperty of " |
|
verbalisation += ", and".join([label(u) for u in subprops[base_prop]]) |
|
verbalisation += ")" |
|
|
|
if base_prop in comment_dict: |
|
verbalisation += f": {comment_dict[base_prop]}" |
|
|
|
if base_prop in domains: |
|
verbalisation += f" The domain of this relation can be: " |
|
verbalisation += ", or ".join([label(u) for u in domains[base_prop]]) |
|
verbalisation += "." |
|
|
|
if base_prop in ranges: |
|
verbalisation += f" The range of this relation can be: " |
|
verbalisation += ", or ".join([label(u) for u in ranges[base_prop]]) |
|
verbalisation += "." |
|
|
|
verbalisation_hist.append(base_prop) |
|
relation_verbalisations.append(verbalisation) |
|
|
|
for subprop in subprops: |
|
if subprop not in verbalisation_hist: |
|
raise NotImplementedError(subprop) |
|
|
|
return relation_verbalisations |
|
|
|
|
|
def collate_verbalisations(class_verbalisations: List[str], |
|
relation_verbalisations: List[str], |
|
nentities_verbalisations: List[str], |
|
onto_about: str, onto_desc: str, |
|
): |
|
ontoverb = "" |
|
|
|
ontoverb += f"Ontology description: {onto_about}. {onto_desc}" |
|
|
|
ontoverb += "\n" |
|
|
|
ontoverb += "The main classes of the ontology are listed below:\n" |
|
for class_verb in class_verbalisations: |
|
ontoverb += f"- {class_verb}\n" |
|
|
|
ontoverb += "\n" |
|
|
|
ontoverb += "The main named entities (individuals) are listed below:\n" |
|
|
|
for ne_verb in nentities_verbalisations: |
|
ontoverb += f"- {ne_verb}\n" |
|
|
|
ontoverb += "\n" |
|
|
|
ontoverb += "The main relations of the ontology are listed below:\n" |
|
for rel_verb in relation_verbalisations: |
|
ontoverb += f"- {rel_verb}\n" |
|
|
|
return ontoverb |
|
|