File size: 3,577 Bytes
48d28d5
 
 
6a00324
745b80e
9e0ab0f
 
6d99166
48d28d5
98b274f
9e0ab0f
745b80e
 
 
96664a2
2ebfb5d
745b80e
 
 
 
 
 
 
16e66e6
04e7c76
16e66e6
04e7c76
 
16e66e6
 
10520fd
c1157cb
24890d4
16e66e6
f59d886
b5bcb4b
f0dea8f
 
92abcaa
ab39a91
f1da055
0ac4d87
4185abb
6d16879
f1da055
41c8dfe
dc2b889
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
import sentence_transformers
from sentence_transformers import SentenceTransformer
import torch
from sentence_transformers.util import semantic_search
import pandas as pd

model = SentenceTransformer('JoBeer/eng-distelBERT-se-autogen') #gart-labor/eng-distilBERT-se-eclass

corpus = pd.read_json('corpus.jsonl', lines = True, encoding = 'utf-8')

def predict(name, description):
    text = 'Description: '+ description + '; Name: ' + name 
    query_embedding = model.encode(text, convert_to_tensor=True)

    corpus_embeddings = torch.Tensor(corpus["embeddings"])

    output = sentence_transformers.util.semantic_search(query_embedding, corpus_embeddings, top_k = 5)

    preferedName1 = corpus.iloc[output[0][0].get('corpus_id'),2]
    definition1 = corpus.iloc[output[0][0].get('corpus_id'),1]
    IRDI1 = corpus.iloc[output[0][0].get('corpus_id'),4]
    score1 = output[0][0].get('score')
    if score1 < 0.5:
        reliable = 'FALSE'
    else:
        reliable = 'TRUE'
    scoreOutput = reliable + ' (score = ' + str(score1) + ')'
    
    return preferedName1, definition1, IRDI1, scoreOutput

interface = gr.Interface(fn = predict, 
            inputs = [gr.Textbox(label="Name:", placeholder="Name of the Pump Property", lines=1), gr.Textbox(label="Description:", placeholder="Description of the Pump Property", lines=1)], 
            outputs = [gr.Textbox(label = 'preferedName'),gr.Textbox(label = 'definition'), gr.Textbox(label = 'IDRI'),gr.Textbox(label = 'prediction reliable')],
            #outputs = [gr.Dataframe(row_count = (5, "fixed"), col_count=(3, "fixed"), label="Predictions", headers=['ECLASS preferedName', 'ECLASS IRDI', 'simularity score'])],
            examples = [['Device type', 'describing a set of common specific characteristics in products or goods'], ['Item type','the type of product, an item can be assigned to'], 
                        ['Nominal power','power being consumed by or dissipated within an electric component as a variable'], ['Power consumption', 'power that is typically taken from the auxiliary power supply when the device is operating normally']],
                         #theme = 'huggingface',
            title = 'ECLASS-Property-Search', 
            description = "This is a semantic search algorithm that maps unknown pump properties to the ECLASS standard. It is created by the GART-labortory ot the cologne university of applied science for the usecase of semantic interoperable asset administration shells (industry 4.0).", 
            article = """<center><Strong><font size="5em">Functionality and further development of the demo</font></strong></center> 
            This demo is based on a sentence-transformer <a href="https://huggingface.co/gart-labor/eng-distilBERT-se-eclass">language model</a>, which is trained on a ECLASS specific <a href="https://huggingface.co/datasets/gart-labor/eclassTrainST">dataset</a>. This dataset consists of manually generated paraphrases of ECLASS pump properties. During training the language model learns to map these paraphrases to the eclass pump properties. In future work, this approach can be extended to additional ECLASS properties (e.g. heating systems, ventilation, etc.) and thus a general language model can be trained. To reduce the manual effort, the integration of chatGPT is suitable for the automated creation of the paraphrases required for training.
            <br>
            <br>
            <center><img src='https://imagizer.imageshack.com/img923/6324/WOXHiX.png' width=900p></center>""")
    
interface.launch()