Spaces:
Sleeping
Sleeping
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings | |
from llama_index.core.node_parser import SentenceSplitter | |
from llama_index.embeddings.gemini import GeminiEmbedding | |
from llama_index.llms.gemini import Gemini | |
import logging | |
import os | |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
# Configure logging | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(level=logging.INFO) | |
def load_data(data_path: str) -> list[str]: | |
""" | |
Load documents from a directory. | |
Args: | |
data_path (str): Path to the directory containing documents | |
Returns: | |
list[str]: List of loaded documents or False if loading fails | |
""" | |
try: | |
logger.info(f"Loading documents from {data_path}") | |
loader = SimpleDirectoryReader(data_path) | |
documents = loader.load_data() | |
logger.info(f"Successfully loaded {len(documents)} documents") | |
return documents | |
except Exception as e: | |
logger.error(f"Failed to load data: {str(e)}") | |
return False | |
def get_gemini_embedding(documents: str): | |
""" | |
Create a query engine using Gemini embeddings. | |
Args: | |
documents (str): Documents to process | |
Returns: | |
QueryEngine: Configured query engine or False if setup fails | |
""" | |
try: | |
logger.info("Initializing Gemini embedding model and LLM") | |
gemini_embedding_model = GeminiEmbedding(model_name="models/embedding-001") | |
llm = Gemini(model="models/gemini-1.5-flash", api_key=GEMINI_API_KEY) | |
# Configure global settings | |
Settings.llm = llm | |
Settings.embed_model = gemini_embedding_model | |
Settings.node_parser = SentenceSplitter(chunk_size=1000, chunk_overlap=20) | |
logger.info("Creating vector store index") | |
index = VectorStoreIndex.from_documents( | |
documents=documents, | |
embed_model=gemini_embedding_model | |
) | |
logger.info("Creating query engine") | |
return index.as_query_engine() | |
except Exception as e: | |
logger.error(f"Failed to setup Gemini embedding: {str(e)}") | |
return False |