Spaces:
Sleeping
Sleeping
import logging | |
from elasticsearch import Elasticsearch, exceptions | |
from typing import Dict, Any | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(level=logging.DEBUG) | |
embedding_dimension = 1536 | |
def create_mapping(properties: Dict[str, Any]) -> Dict[str, Any]: | |
"""Helper function to create index mappings with predefined settings.""" | |
return { | |
"settings": {"number_of_shards": 1, "number_of_replicas": 1}, | |
"mappings": {"properties": properties}, | |
} | |
def retrieval_index() -> Dict[str, Any]: | |
"""Returns the Elasticsearch mapping for retrieval indices.""" | |
return create_mapping( | |
{ | |
"chunk_id": {"type": "keyword"}, | |
"chunk": {"type": "text"}, | |
"embedding": { | |
"type": "dense_vector", | |
"dims": embedding_dimension, | |
}, | |
"certification": {"type": "keyword"}, | |
"source_file": {"type": "keyword"}, | |
"timestamp": {"type": "date"}, | |
} | |
) | |
def create_elasticsearch_index(es_client: Elasticsearch, index_name: str) -> bool: | |
""" | |
Create an Elasticsearch index with the appropriate mapping. | |
Args: | |
es_client (Elasticsearch): The Elasticsearch client instance. | |
index_name (str): The name of the index to create. | |
Returns: | |
bool: True if the index was created successfully, False otherwise. | |
""" | |
try: | |
mapping = retrieval_index() | |
if es_client.indices.exists(index=index_name): | |
logger.warning(f"Index '{index_name}' already exists. Skipping creation.") | |
return True | |
es_client.indices.create(index=index_name, body=mapping) | |
logger.info(f"Index '{index_name}' created successfully.") | |
return True | |
except Exception as e: | |
logger.error(f"Unexpected error while creating index '{index_name}': {e}") | |
return False |