""" | |
This script handles document embedding using EmbeddingGemma. | |
This is the entry point for indexing documents. | |
TODO: Wire this to FAISS | |
""" | |
import os | |
from sentence_transformers import SentenceTransformer | |
def embed_documents(path: str, config: dict): | |
try: | |
model = SentenceTransformer(config["embedding"]["model_path"]) | |
except Exception as e: | |
print(f"Error loading model: {str(e)}") | |
model = SentenceTransformer(config["embedding"]["model_path"]) | |
embeddings = [] | |
for fname in os.listdir(path): | |
with open(os.path.join(path, fname), "r", encoding="utf-8") as f: | |
text = f.read() | |
emb = model.encode(text) | |
if emb is not None: | |
embeddings.append((fname, emb)) | |
else: | |
print(f"Embedding failed for {fname}.") | |
print(f"Total embeddings created: {len(embeddings)}") | |
return embeddings | |
# TODO: Save embeddings to disk or vector store | |