Spaces:
Sleeping
Sleeping
File size: 1,003 Bytes
e394a78 1f89808 e394a78 1f89808 e394a78 1f89808 e394a78 1f89808 5ae483b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# * This is for Rag pipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENROUTE_API_KEY')
def dataIngestion( document):
loader = PyPDFLoader(document)
ingested_docs = loader.load()
return ingested_docs
def transform( ingested_docs):
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
transformed_docs = text_splitter.split_documents(ingested_docs)
return transformed_docs
def vectorStoreAndEmbeddings(docs, query):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = Chroma.from_documents(documents=docs, embedding=embeddings)
return db.similarity_search(query) |