File size: 2,129 Bytes
355fe19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini
import logging
import os

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

# Configure logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

def load_data(data_path: str) -> list[str]:
    """
    Load documents from a directory.
    
    Args:
        data_path (str): Path to the directory containing documents
    
    Returns:
        list[str]: List of loaded documents or False if loading fails
    """
    try:
        logger.info(f"Loading documents from {data_path}")
        loader = SimpleDirectoryReader(data_path)
        documents = loader.load_data()
        logger.info(f"Successfully loaded {len(documents)} documents")
        return documents
    except Exception as e:
        logger.error(f"Failed to load data: {str(e)}")
        return False

def get_gemini_embedding(documents: str):
    """
    Create a query engine using Gemini embeddings.
    
    Args:
        documents (str): Documents to process
    
    Returns:
        QueryEngine: Configured query engine or False if setup fails
    """
    try:
        logger.info("Initializing Gemini embedding model and LLM")
        gemini_embedding_model = GeminiEmbedding(model_name="models/embedding-001")
        llm = Gemini(model="models/gemini-1.5-flash", api_key=GEMINI_API_KEY)

        # Configure global settings
        Settings.llm = llm
        Settings.embed_model = gemini_embedding_model
        Settings.node_parser = SentenceSplitter(chunk_size=1000, chunk_overlap=20)

        logger.info("Creating vector store index")
        index = VectorStoreIndex.from_documents(
            documents=documents, 
            embed_model=gemini_embedding_model
        )
        
        logger.info("Creating query engine")
        return index.as_query_engine()
    except Exception as e:
        logger.error(f"Failed to setup Gemini embedding: {str(e)}")
        return False