Spaces:

dev-jas
/

CodeMind

Running

CodeMind / src /embedder.py

devjas1

feat: Inital commit from recovered state

80b95e8 about 1 month ago

993 Bytes

	"""
	This script handles document embedding using EmbeddingGemma.
	This is the entry point for indexing documents.
	TODO: Wire this to FAISS
	"""

	import os
	from sentence_transformers import SentenceTransformer


	def embed_documents(path: str, config: dict):
	try:
	model = SentenceTransformer(config["embedding"]["model_path"])
	except Exception as e:
	print(f"Error loading model: {str(e)}")

	model = SentenceTransformer(config["embedding"]["model_path"])
	embeddings = []

	for fname in os.listdir(path):
	with open(os.path.join(path, fname), "r", encoding="utf-8") as f:
	text = f.read()
	emb = model.encode(text)
	if emb is not None:
	embeddings.append((fname, emb))
	else:
	print(f"Embedding failed for {fname}.")

	print(f"Total embeddings created: {len(embeddings)}")
	return embeddings

	# TODO: Save embeddings to disk or vector store