Spaces:

JoBeer
/

ECLASS-Search-Pump

Sleeping

App Files Files Community

ECLASS-Search-Pump / app.py

JoBeer

Update app.py

6d99166 11 months ago

raw history blame contribute delete

No virus

3.58 kB

	import gradio as gr
	import sentence_transformers
	from sentence_transformers import SentenceTransformer
	import torch
	from sentence_transformers.util import semantic_search
	import pandas as pd

	model = SentenceTransformer('JoBeer/eng-distelBERT-se-autogen') #gart-labor/eng-distilBERT-se-eclass

	corpus = pd.read_json('corpus.jsonl', lines = True, encoding = 'utf-8')

	def predict(name, description):
	text = 'Description: '+ description + '; Name: ' + name
	query_embedding = model.encode(text, convert_to_tensor=True)

	corpus_embeddings = torch.Tensor(corpus["embeddings"])

	output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = 5)

	preferedName1 = corpus.iloc[output[0][0].get('corpus_id'),2]
	definition1 = corpus.iloc[output[0][0].get('corpus_id'),1]
	IRDI1 = corpus.iloc[output[0][0].get('corpus_id'),4]
	score1 = output[0][0].get('score')
	if score1 < 0.5:
	reliable = 'FALSE'
	else:
	reliable = 'TRUE'
	scoreOutput = reliable + ' (score = ' + str(score1) + ')'

	return preferedName1, definition1, IRDI1, scoreOutput

	interface = gr.Interface(fn = predict,
	inputs = [gr.Textbox(label="Name:", placeholder="Name of the Pump Property", lines=1), gr.Textbox(label="Description:", placeholder="Description of the Pump Property", lines=1)],
	outputs = [gr.Textbox(label = 'preferedName'),gr.Textbox(label = 'definition'), gr.Textbox(label = 'IDRI'),gr.Textbox(label = 'prediction reliable')],
	#outputs = [gr.Dataframe(row_count = (5, "fixed"), col_count=(3, "fixed"), label="Predictions", headers=['ECLASS preferedName', 'ECLASS IRDI', 'simularity score'])],
	examples = [['Device type', 'describing a set of common specific characteristics in products or goods'], ['Item type','the type of product, an item can be assigned to'],
	['Nominal power','power being consumed by or dissipated within an electric component as a variable'], ['Power consumption', 'power that is typically taken from the auxiliary power supply when the device is operating normally']],
	#theme = 'huggingface',
	title = 'ECLASS-Property-Search',
	description = "This is a semantic search algorithm that maps unknown pump properties to the ECLASS standard. It is created by the GART-labortory ot the cologne university of applied science for the usecase of semantic interoperable asset administration shells (industry 4.0).",
	article = """<center><Strong><font size="5em">Functionality and further development of the demo</font></strong></center>
	This demo is based on a sentence-transformer <a href="https://huggingface.co/gart-labor/eng-distilBERT-se-eclass">language model</a>, which is trained on a ECLASS specific <a href="https://huggingface.co/datasets/gart-labor/eclassTrainST">dataset</a>. This dataset consists of manually generated paraphrases of ECLASS pump properties. During training the language model learns to map these paraphrases to the eclass pump properties. In future work, this approach can be extended to additional ECLASS properties (e.g. heating systems, ventilation, etc.) and thus a general language model can be trained. To reduce the manual effort, the integration of chatGPT is suitable for the automated creation of the paraphrases required for training.
	<br>
	<br>
	<center><img src='https://imagizer.imageshack.com/img923/6324/WOXHiX.png' width=900p></center>""")

	interface.launch()