JoBeer's picture
Update app.py
6d99166
import gradio as gr
import sentence_transformers
from sentence_transformers import SentenceTransformer
import torch
from sentence_transformers.util import semantic_search
import pandas as pd
model = SentenceTransformer('JoBeer/eng-distelBERT-se-autogen') #gart-labor/eng-distilBERT-se-eclass
corpus = pd.read_json('corpus.jsonl', lines = True, encoding = 'utf-8')
def predict(name, description):
text = 'Description: '+ description + '; Name: ' + name
query_embedding = model.encode(text, convert_to_tensor=True)
corpus_embeddings = torch.Tensor(corpus["embeddings"])
output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = 5)
preferedName1 = corpus.iloc[output[0][0].get('corpus_id'),2]
definition1 = corpus.iloc[output[0][0].get('corpus_id'),1]
IRDI1 = corpus.iloc[output[0][0].get('corpus_id'),4]
score1 = output[0][0].get('score')
if score1 < 0.5:
reliable = 'FALSE'
else:
reliable = 'TRUE'
scoreOutput = reliable + ' (score = ' + str(score1) + ')'
return preferedName1, definition1, IRDI1, scoreOutput
interface = gr.Interface(fn = predict,
inputs = [gr.Textbox(label="Name:", placeholder="Name of the Pump Property", lines=1), gr.Textbox(label="Description:", placeholder="Description of the Pump Property", lines=1)],
outputs = [gr.Textbox(label = 'preferedName'),gr.Textbox(label = 'definition'), gr.Textbox(label = 'IDRI'),gr.Textbox(label = 'prediction reliable')],
#outputs = [gr.Dataframe(row_count = (5, "fixed"), col_count=(3, "fixed"), label="Predictions", headers=['ECLASS preferedName', 'ECLASS IRDI', 'simularity score'])],
examples = [['Device type', 'describing a set of common specific characteristics in products or goods'], ['Item type','the type of product, an item can be assigned to'],
['Nominal power','power being consumed by or dissipated within an electric component as a variable'], ['Power consumption', 'power that is typically taken from the auxiliary power supply when the device is operating normally']],
#theme = 'huggingface',
title = 'ECLASS-Property-Search',
description = "This is a semantic search algorithm that maps unknown pump properties to the ECLASS standard. It is created by the GART-labortory ot the cologne university of applied science for the usecase of semantic interoperable asset administration shells (industry 4.0).",
article = """<center><Strong><font size="5em">Functionality and further development of the demo</font></strong></center>
This demo is based on a sentence-transformer <a href="https://huggingface.co/gart-labor/eng-distilBERT-se-eclass">language model</a>, which is trained on a ECLASS specific <a href="https://huggingface.co/datasets/gart-labor/eclassTrainST">dataset</a>. This dataset consists of manually generated paraphrases of ECLASS pump properties. During training the language model learns to map these paraphrases to the eclass pump properties. In future work, this approach can be extended to additional ECLASS properties (e.g. heating systems, ventilation, etc.) and thus a general language model can be trained. To reduce the manual effort, the integration of chatGPT is suitable for the automated creation of the paraphrases required for training.
<br>
<br>
<center><img src='https://imagizer.imageshack.com/img923/6324/WOXHiX.png' width=900p></center>""")
interface.launch()