File size: 1,251 Bytes
9e01274 ac599f3 9e01274 ac599f3 9e01274 ac599f3 9e01274 ac599f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
def build_vector_index(csv_data: dict, persist_directory: str = "db"):
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
texts, metadatas, ids = [], [], []
for chave, nfe in csv_data.items():
head = nfe.get("head", {})
if head:
text = " | ".join(f"{k}: {v}" for k, v in head.items())
texts.append(text)
metadatas.append({"chave": chave, "type": "head"})
ids.append(f"{chave}-head")
for idx, item in enumerate(nfe.get("items", [])):
item_text = " | ".join(f"{k}: {v}" for k, v in item.items())
texts.append(item_text)
metadatas.append({"chave": chave, "type": "item", "item_idx": idx})
ids.append(f"{chave}-item-{idx}")
vectordb = Chroma.from_texts(
texts=texts,
embedding=embeddings,
metadatas=metadatas,
ids=ids,
persist_directory=persist_directory,
collection_name="csv_collection",
)
return vectordb
def query_vector_index(vectordb, question: str, k: int = 5):
results = vectordb.similarity_search(question, k=k)
return results
|