bibliotecadebabel
mxbai endpoint
b1179cf
raw
history blame
1.45 kB
import src.constants.credentials as cred
import os
service_mxbai_msc_direct_config = {"reader_config": {"input_path": os.environ['msc_direct_s3_path'],
"credentials": cred.credentials_backblaze,
"format":"parquet"
},
"sample_size": 32,
"sentence_transformer_name": "mixedbread-ai/mxbai-embed-large-v1",
"cross_encoder_name": "mixedbread-ai/mxbai-rerank-large-v1",
"batch_size": 4,
"dataset_size": 32,
"seq_len": 256,
"top_k": 50,
"semantic_column_names": ['name', 'price', 'brand', 'keyword', 'description',
'specifications'],
"programmatic_search_config": {
"scalar_columns": [{"column_name": "price", "min_value": 0, "max_value": "10000"}],
"discrete_columns": [{"column_name": "brand", "default_values": []}],
"columns_to_drop": ["similarities", "embeddings", "index"]
}
}