from sentence_transformers import SentenceTransformer, util # from transformers import MBartForConditionalGeneration, MBart50TokenizerFast import time import os import json import pandas as pd import numpy as np import category_encoders as ce import string import pickle import tqdm.autonotebook from fastapi import FastAPI, Request, UploadFile, File from joblib import dump, load from pydantic import BaseModel import sys from database_build import index_corpus from predict_different_aas import ask_database from predict_one_aas import query_specific_aas from typing import Any, Dict, AnyStr, List, Union import chromadb from chromadb.config import Settings from typing import Union app = FastAPI(title="Interface Semantic Matching") JSONObject = Dict[AnyStr, Any] JSONArray = List[Any] JSONStructure = Union[JSONArray, JSONObject] class submodelElement(BaseModel): datatype: str definition: str name: str semantic_id: str unit: str return_matches: int aas_id: str number_aas_returned: int @app.on_event("startup") def load_hf_model(): global model # Altes Modell # model = SentenceTransformer('mboth/distil-eng-quora-sentence') # Fine Tuned Modell model = SentenceTransformer("gart-labor/eng-distilBERT-se-eclass") # global model_translate # model_translate = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") # global tokenizer_translate # tokenizer_translate = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") with open("app/metadata.pickle", "rb") as handle: global metalabel metalabel = pickle.load(handle) global client_chroma client_chroma = chromadb.Client( Settings( chroma_api_impl="rest", # chroma_server_host muss angepasst werden nach jedem Neustart AWS chroma_server_host="3.67.86.197", chroma_server_http_port=8000, ) ) @app.post("/PostAssetAdministrationShellEmbeddings") async def index_aas(aas: UploadFile = File(...)): data = json.load(aas.file) print(type(data)) # aas = new_file #aas, submodels, conceptDescriptions, assets, aas_df, collection, aas_name= index_corpus(data, model, metalabel, client_chroma) collection = index_corpus(data, model, metalabel, client_chroma) ready = 'AAS ready' return ready @app.post("/GetSubmodelElementsFromDifferentAASBySemanticIdAndSemanticInformation") def predict_different_aas(name: str, definition: str, number_aas_returned: Union[int, None] = 1, semantic_id: Union[str, None] = "NaN", unit: Union[str, None] = "NaN", datatype: Union[str, None] = "NaN"): collections = client_chroma.list_collections() query = { "Name": name, "Definition": definition, "Unit": unit, "Datatype": datatype, "SemanticId": semantic_id, "NumberAASReturned": number_aas_returned } results = ask_database(query, metalabel, model, collections, client_chroma) return results @app.post("/GetSubmodelElementsFromSpecificAASBySemanticIdAndSemanticInformation") def predict_specific_aas(name: str, definition: str, aas_id: str, return_matches: Union[int, None] = 2, semantic_id: Union[str, None] = "NaN", unit: Union[str, None] = "NaN", datatype: Union[str, None] = "NaN"): collections = client_chroma.list_collections() query = { "Name": name, "Definition": definition, "Unit": unit, "Datatype": datatype, "SemanticId": semantic_id, "ReturnMatches": return_matches, "AASId": aas_id, } result = query_specific_aas(query, metalabel, model, collections, client_chroma) return result