Spaces:

mboth
/

docker-aas

Runtime error

App Files Files Community

docker-aas / app /predict_one_aas.py

mboth

Upload 5 files

c2e327f over 1 year ago

raw

history blame

No virus

8.27 kB

	from sentence_transformers import SentenceTransformer, util
	import json
	import time
	import pandas as pd
	import numpy as np
	import pickle

	import chromadb
	from chromadb.config import Settings
	from chromadb.utils import embedding_functions
	from chromadb.db.clickhouse import NoDatapointsException

	def query_right_aas(json_query, collection, metalabel, model):
	query = json.loads(json_query)
	name = query['Name']
	definition = query["Definition"]
	unit = query["Unit"]
	datatype = query["Datatype"]
	semantic_id = query["SemanticId"]
	return_matches = query["ReturnMatches"]

	datatype_mapping = {'boolean': 'BOOLEAN', 'string': 'STRING', 'string_translatable':'STRING', 'translatable_string': 'STRING', 'non_translatable_string':'STRING',
	'date':'DATE', 'data_time':'DATE', 'uri':'URI', 'int':'INT', 'int_measure':'INT', 'int_currency':'INT', 'integer': 'INT',
	'real':'REAL', 'real_measure': 'REAL', 'real_currency':'REAL', 'enum_code': 'ENUM_CODE', 'enum_int':'ENUM_CODE',
	'ENUM_REAL': 'ENUM_CODE', 'ENUM_RATIONAL': 'ENUM_CODE', 'ENUM_BOOLEAN': 'ENUM_CODE', 'ENUM_STRING': 'ENUM_CODE',
	'enum_reference': 'ENUM_CODE', 'enum_instance': 'ENUM_CODE', 'set(b1,b2)': 'SET',
	'constrained_set(b1,b2,cmn,cmx)': 'SET', 'set [0,?]': 'SET', 'set [1,?]': 'SET','set [1, ?]': 'SET', 'nan': 'NaN',
	'media_type':'LARGE_OBJECT_TYPE'}

	unit_mapping = {'nan': 'NaN', 'hertz': 'FREQUENCY', 'hz': 'FREQUENCY', 'pa': 'PRESSURE', 'pascal': 'PRESSURE', 'n/m²':'PRESSURE',
	'bar': 'PRESSURE', '%': 'SCALARS_PERC', 'w': 'POWER', 'watt': 'POWER', 'kw': 'POWER', 'kg/m³':'CHEMISTRY',
	'm²/s': 'CHEMISTRY', 'pa*s': 'CHEMISTRY', 'v':'ELECTRICAL', 'volt': 'ELECTRICAL', 'db': 'ACOUSTICS',
	'db(a)': 'ACOUSTICS','k': 'TEMPERATURE', '°c': 'TEMPERATURE', 'n': 'MECHANICS', 'newton':'MECHANICS', 'kg/s':'FLOW',
	'kg/h':'FLOW', 'm³/s': 'FLOW', 'm³/h': 'FLOW', 'l/s':'FLOW', 'l/h':'FLOW', 'µm': 'LENGTH', 'mm':'LENGTH', 'cm':'LENGTH',
	'dm':'LENGTH', 'm':'LENGTH' ,'meter': 'LENGTH', 'm/s':'SPEED', 'km/h': 'SPEED', 's^(-1)':'FREQUENCY', '1/s':'FREQUENCY',
	's':'TIME', 'h':'TIME', 'min':'TIME', 'd': 'TIME', 'hours': 'TIME', 'a': 'ELECTRICAL', 'm³': 'VOLUME',
	'm²': 'AREA', 'rpm': 'FLOW', 'nm': 'MECHANICS', 'm/m': 'MECHANICS', 'm³/m²s': 'MECHANICS', 'w(m²*K)': 'HEAT_TRANSFER',
	'kwh': 'ELECTRICAL', 'kg/(sm²)': 'FLOW', 'kg': 'MASS', 'w/(mk)': 'HEAT_TRANSFER', 'm²*k/w': 'HEAT_TRANSFER',
	'j/s': 'POWER'}

	unit_lower = unit.lower()
	datatype_lower = datatype.lower()

	unit_categ = unit_mapping.get(unit_lower)
	datatype_categ = datatype_mapping.get(datatype_lower)

	if unit_categ == None:
	unit_categ = 'NaN'
	if datatype_categ == None:
	datatype_categ = 'NaN'

	concat= (unit_categ, datatype_categ)
	keys = [k for k, v in metalabel.items() if v == concat]
	metadata = keys[0]

	name_embedding = model.encode(name)
	definition_embedding = model.encode(definition)
	concat_name_def_query = np.concatenate((definition_embedding, name_embedding), axis = 0)
	concat_name_def_query = concat_name_def_query.tolist()

	queries = [concat_name_def_query]
	#print(type(queries))

	# Query wird mit Semantic Search, k-nearest-neighbor durchgeführt
	# Chroma verwendet hierfür hnswlib https://github.com/nmslib/hnswlib
	# Dort kann als Distanz Cosine, Squared L2 oder Inner Product eingestellt werden
	# In Chroma ist L2 als Distanz eingestellt, vgl. https://github.com/chroma-core/chroma/blob/4463d13f951a4d28ade1f7e777d07302ff09069b/chromadb/db/index/hnswlib.py -> suche nach l2

	# Homogener fall, untersuchen nach Semant Ids, wenn welche gefunden werden, ist homgen erfolgreich
	try:
	homogen = collection.query(
	query_embeddings=queries,
	n_results=1,
	where={"SESemanticId": semantic_id}
	)
	#except NoDatapointsException:
	# homogen = 'Nix'

	except Exception:
	homogen = 'Nix'

	if homogen != 'Nix':
	result = homogen
	result['matching_method']= 'Semantic equivalent , same semantic Id'
	result['matching_algorithm'] = 'None'
	result['distances'] = [[0]]
	value = result['documents'][0][0]
	value_dict = json.loads(value)

	final_result = {
	"matching_method": result['matching_method'],
	"matching_algorithm": result['matching_algorithm'],
	"matching_distance": result['distances'][0][0],
	"aas_id": result['metadatas'][0][0]['AASId'],
	"aas_id_short": result['metadatas'][0][0]['AASIdShort'],
	"submodel_id_short": result['metadatas'][0][0]['SubmodelName'],
	"submodel_id": result['metadatas'][0][0]['SubmodelId'],
	"matched_object": value_dict,
	}
	final_results = [final_result]
	# Wenn keine passende semantic id gefunden, dann weiter mit NLP mit und ohne Metadaten
	elif homogen == 'Nix':
	try:
	with_metadata = collection.query(
	query_embeddings=queries,
	n_results=return_matches,
	where={"Metalabel": metadata},
	)

	#except NoDatapointsException:
	# with_metadata = 'Nix'

	except Exception:
	with_metadata = 'Nix'

	without_metadata = collection.query(
	query_embeddings=queries,
	n_results=return_matches,
	)
	print(without_metadata)

	if with_metadata == 'Nix':
	result = without_metadata
	result['matching_method']= 'Semantically not equivalent, NLP without Metadata'
	result['matching_algorithm'] = 'Semantic search, k-nearest-neighbor with squared L2 distance (euclidean distance), with model gart-labor/eng-distilBERT-se-eclass'

	elif with_metadata != 'Nix':
	distance_with_meta = with_metadata['distances'][0][0]
	distance_without_meta = without_metadata['distances'][0][0]
	#print(distance_with_meta)
	#print(distance_without_meta)
	# Vergleich der Abstände von mit und ohne Metadaten
	if distance_without_meta <= distance_with_meta:
	result = without_metadata
	result['matching_method']= 'Semantically not equivalent, NLP without Metadata'
	result['matching_algorithm'] = 'Semantic search, k-nearest-neighbor with squared L2 distance (euclidean distance), with model gart-labor/eng-distilBERT-se-eclass'

	else:
	result = with_metadata
	result['matching_method']= 'Semantically not equivalent, NLP without Metadata'
	result['matching_algorithm'] = 'Semantic search, k-nearest-neighbor with squared L2 distance (euclidean distance), with model gart-labor/eng-distilBERT-se-eclass'
	# Aufbereiten des passenden finalen Ergebnisses
	final_results = []
	print(result)
	for i in range(0, return_matches):
	value = result['documents'][0][i]
	value_dict = json.loads(value)
	final_result = {
	"matching_method": result['matching_method'],
	"matching_algorithm": result['matching_algorithm'],
	"matching_distance": result['distances'][0][i],
	#"aas_id": result['metadatas'][0][i]['AASId'],
	#"aas_id_short": result['metadatas'][0][i]['AASIdShort'],
	"submodel_id_short": result['metadatas'][0][i]['SubmodelName'],
	"submodel_id": result['metadatas'][0][i]['SubmodelId'],
	"matched_object": value_dict
	}
	#final_result = json.dumps(final_result, indent = 4)
	final_results.append(final_result)

	return final_results

	def get_right_collection(collections, aas_id):
	right_collection = []
	for collection in collections:
	try_collection = collection.get(where={'AASId': aas_id})
	try:
	collection_aas_id = try_collection['metadatas'][0]['AASId']
	right_collection.append(collection)
	except:
	print('Nix')
	if(right_collection == []):
	right_collection = ['AAS not in database']

	return right_collection

	# Eine spezifische AAS
	def query_specific_aas(query, metalabel, model, collections, client_chroma):
	json_query = json.dumps(query, indent = 4)
	aas_id = query['AASId']
	right_collection = get_right_collection(collections, aas_id)
	if right_collection == ['AAS not in database']:
	result = right_collection
	else:
	collection = client_chroma.get_collection(right_collection[0].name)
	result = query_right_aas(json_query, collection, metalabel, model)

	return result