Spaces:
Runtime error
Runtime error
import spacy | |
from functools import lru_cache | |
from pydantic import BaseSettings, Field | |
from source.datamodel.common import CountryCode, LineOfBusiness | |
from source.datamodel.annotation_ranking import Weights, WeightCatalog | |
class Settings(BaseSettings): | |
SERVER_HOST: str = '0.0.0.0' | |
PORT: int = 3000 | |
STOP_TIMEOUT = 120 | |
SLEEP_DURATION = 1e-4 # 0.1 ms sleep | |
APP_NAME: str = "MIRA MODELS" | |
MIRA_MODELS_BLOB_PATH: str = "Mira/ml_models" | |
LOCAL_MIRA_MODELS: str = "ml_models" | |
MIRA_INTENT_MODEL: str = "ml_models/intent_classifier/2021-04-09" | |
MARINE_NL_NER_MODEL: str = "ml_models/ner_marine_nl/2021-04-09" | |
MARINE_NL_RB_MODEL: str = "ml_models/ner_marine_nl/rule_based_annotator/rb_annotator.pkl" | |
PROPERTY_NL_NER_MODEL: str = "ml_models/ner_property_nl/ner_v10" | |
PROPERTY_BE_NER_MODEL: str = "ml_models/ner_property_be/ner_v10" | |
PROPERTY_BE_UW_MODEL: str = Field("ml_models/ner_property_be/uw_property_be_dev", env='PROPERTY_BE_UW_MODEL') | |
PROPERTY_NL_UW_MODEL: str = Field("ml_models/ner_property_nl/uw_property_nl_dev", env='PROPERTY_NL_UW_MODEL') | |
ADDRESS_DETECTION_LAXONS: str = "ml_models/address_detection/laxons.json" | |
ADDRESS_DETECTION_TERMS: str = "ml_models/address_detection/terms.json" | |
ADDRESS_DETECTION_BROKER_ADDRESSES: str = "ml_models/address_detection/broker_addresses.json" | |
LAYOUTLM_MODEL: str = "ml_models/layoutlm/layoutlm_model.pth" | |
LAYOUTLM_LABEL_MAPPING: str = "ml_models/layoutlm/labels_mapping.json" | |
LAYOUTLM_TOKENIZER: str = "ml_models/layoutlm/tokenizer" | |
ADDRESS_DETECTION_MAX_LEN: int = 60 | |
ADDRESS_INDEX_MIN: int = 40 | |
DEEPPARSE_ROOT_DIR: str = "ml_models/deepparse" | |
TSI_THRESHOLD: int = 100000 | |
BROKER_MODEL: dict = { | |
'CRF_BROKER_MODEL_PATH': r"source/services/ner_crf/model/crf/30_Nov_2023-14h-broker_pycrf.crfsuite", | |
'WORD_POSITION': 1, | |
#'POS_POSITION': 2, | |
'LEMMA_POSITION': 2, | |
#'NER_POSITION': 3 | |
} | |
si_model: dict = { | |
'CRF_SI_MODEL_PATH': r"ml_models/si/crf_23_Jun_2022-11h_inclu_lemma_n_amount_with_eur_gt10k_amount.joblib", | |
'WORD_POSITION': 1, | |
'LEMMA_POSITION': 2, | |
'NER_POSITION': 3, | |
'POS_POSITION': 4 | |
} | |
#spacy_pretrained_model_nl_sm = spacy.load('nl_core_news_sm') | |
spacy_pretrained_model_nl_md = spacy.load('nl_core_news_md') | |
layoutlm_config: dict = {'local_rank': -1, | |
'overwrite_cache': True, | |
'max_seq_length': 512, | |
'model_type': 'layoutlm', | |
'cls_token_box': [0, 0, 0, 0], | |
'sep_token_box': [1000, 1000, 1000, 1000], | |
'pad_token_box': [0, 0, 0, 0]} | |
def loss_ratio_params(): | |
url = "http://0.0.0.0:3000/claim-experience-risk-level/" | |
login = "clerk" | |
pw = "asdfgh" | |
return url, login, pw | |
def get_weight_catalog(): | |
weight_catalog = WeightCatalog() | |
# PROPERTY BE WEIGHTS | |
weight_catalog.set_weights( | |
LineOfBusiness.property, CountryCode.belgium, 'POLICYHOLDER', | |
Weights(subject=0.7, body=0.2, attachment=0.1)) | |
weight_catalog.set_weights( | |
LineOfBusiness.property, CountryCode.belgium, 'BROKER', | |
Weights(subject=0.1, body=0.6, attachment=0.2)) | |
# PROPERTY NL WEIGHTS | |
weight_catalog.set_weights( | |
LineOfBusiness.property, CountryCode.netherlands, 'POLICYHOLDER', | |
Weights(subject=0.7, body=0.2, attachment=0.1)) | |
weight_catalog.set_weights( | |
LineOfBusiness.property, CountryCode.netherlands, 'BROKER', | |
Weights(subject=0.1, body=0.6, attachment=0.2)) | |
return weight_catalog | |
def get_settings(): | |
return Settings() | |
# Instantiate the settings | |
settings = get_settings() | |