import gradio as gr import sentence_transformers from sentence_transformers import SentenceTransformer import torch from sentence_transformers.util import semantic_search import pandas as pd model = SentenceTransformer('JoBeer/eng-distelBERT-se-autogen') #gart-labor/eng-distilBERT-se-eclass corpus = pd.read_json('corpus.jsonl', lines = True, encoding = 'utf-8') def predict(name, description): text = 'Description: '+ description + '; Name: ' + name query_embedding = model.encode(text, convert_to_tensor=True) corpus_embeddings = torch.Tensor(corpus["embeddings"]) output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = 5) preferedName1 = corpus.iloc[output[0][0].get('corpus_id'),2] definition1 = corpus.iloc[output[0][0].get('corpus_id'),1] IRDI1 = corpus.iloc[output[0][0].get('corpus_id'),4] score1 = output[0][0].get('score') if score1 < 0.5: reliable = 'FALSE' else: reliable = 'TRUE' scoreOutput = reliable + ' (score = ' + str(score1) + ')' return preferedName1, definition1, IRDI1, scoreOutput interface = gr.Interface(fn = predict, inputs = [gr.Textbox(label="Name:", placeholder="Name of the Pump Property", lines=1), gr.Textbox(label="Description:", placeholder="Description of the Pump Property", lines=1)], outputs = [gr.Textbox(label = 'preferedName'),gr.Textbox(label = 'definition'), gr.Textbox(label = 'IDRI'),gr.Textbox(label = 'prediction reliable')], #outputs = [gr.Dataframe(row_count = (5, "fixed"), col_count=(3, "fixed"), label="Predictions", headers=['ECLASS preferedName', 'ECLASS IRDI', 'simularity score'])], examples = [['Device type', 'describing a set of common specific characteristics in products or goods'], ['Item type','the type of product, an item can be assigned to'], ['Nominal power','power being consumed by or dissipated within an electric component as a variable'], ['Power consumption', 'power that is typically taken from the auxiliary power supply when the device is operating normally']], #theme = 'huggingface', title = 'ECLASS-Property-Search', description = "This is a semantic search algorithm that maps unknown pump properties to the ECLASS standard. It is created by the GART-labortory ot the cologne university of applied science for the usecase of semantic interoperable asset administration shells (industry 4.0).", article = """Functionality and further development of the demo This demo is based on a sentence-transformer language model, which is trained on a ECLASS specific dataset. This dataset consists of manually generated paraphrases of ECLASS pump properties. During training the language model learns to map these paraphrases to the eclass pump properties. In future work, this approach can be extended to additional ECLASS properties (e.g. heating systems, ventilation, etc.) and thus a general language model can be trained. To reduce the manual effort, the integration of chatGPT is suitable for the automated creation of the paraphrases required for training.

""") interface.launch()