File size: 1,659 Bytes
564359b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_qdrant import Qdrant
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_google_genai import ChatGoogleGenerativeAI
from src.embeddings import get_embeddings
def get_pdf_text(pdf_docs):
text=""
for pdf in pdf_docs:
pdf_reader= PdfReader(pdf)
for page in pdf_reader.pages:
text+= page.extract_text()
print("Extracted the text.......")
return text
def get_text_chunks(text,chunk_size,chunk_overlap):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
chunks = text_splitter.split_text(text)
print("Chunking Done.......")
return chunks
def get_vector_store(chunks,target_collection,url,api_key):
vector_store = Qdrant.from_texts(
chunks,
embedding = get_embeddings(),
url=url,
api_key=api_key,
prefer_grpc=False,
collection_name=target_collection,
timeout=75
)
print("Vector store successfully created..........")
print(f"vector store = {vector_store}")
return vector_store
def get_conversational_chain(vector_store,google_api_key):
llm=ChatGoogleGenerativeAI(model="gemini-1.5-pro",google_api_key = google_api_key)
memory = ConversationBufferMemory(memory_key = "chat_history", return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory)
return conversation_chain |