genai / retrieve /vector_store.py
gauravprasadgp's picture
initial commit
d132e19
raw
history blame
2.31 kB
import datetime
import json
from fastapi import File
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.schema import BaseNode
from generator.llm_calls import get_embed_model
from utils.db import postgres_db
async def create_embeddings_from_file(file: File()):
documents = await SimpleDirectoryReader(input_files=[file]).aload_data()
node_parser = SentenceWindowNodeParser.from_defaults(
window_size=3,
window_metadata_key="window",
original_text_metadata_key="original_text",
)
nodes = node_parser.get_nodes_from_documents(documents)
for node in nodes:
embedding = await get_embed_model().aget_text_embedding(node.get_content())
node.embedding = embedding
def get_values_from_nodes(nodes: list[BaseNode]):
values = []
dt = datetime.datetime.now()
for node in nodes:
value = (node.embedding, node.get_content(), json.dumps(node.metadata), dt)
values.append(value)
return values
async def insert_documents(nodes):
try:
values = get_values_from_nodes(nodes)
async with postgres_db.db_pool as conn:
async with conn.cursor() as cur:
await cur.executemany("""
INSERT INTO document_embedding (embedding, document, metadata, created_at)
VALUES (%s, %s, %s, %s);
""", values)
await conn.commit()
except Exception as error:
print(f"insert document exception {error}")
await conn.rollback()
async def get_relevant_document(query: str):
embedded_question = await get_embed_model().aget_query_embedding(query=query)
try:
async with postgres_db.db_pool as conn:
async with conn.cursor() as cur:
await cur.execute(f"""SELECT metadata -> 'window', 1 - (embedding <=> '{embedded_question}') AS
cosine_similarity from document_embedding ORDER BY cosine_similarity DESC limit 10;""")
results = await cur.fetchall()
docs = [row[0] for row in results]
return docs
except Exception as error:
print(f"insert document exception {error}")
await conn.rollback()