PoemTest / app_old.py
thomasjacob04's picture
Update app_old.py
ed609aa verified
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
import gradio as gr
############################## OLD CODE FOR 1 PDF ####################################
# Load and split documents
FILE_PATH = ""
loader = PyPDFLoader(FILE_PATH)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
)
splits = text_splitter.split_documents(docs)
# Set up embeddings and vector store
HF_EMBED_MODEL_ID = "BAAI/bge-small-en-v1.5"
embeddings = HuggingFaceEmbeddings(model_name=HF_EMBED_MODEL_ID)
vectorstore = FAISS.from_documents(splits, embeddings)
############################## NEW CODE FOR 5 PDFs ####################################
# Load and split documents
# FILE_PATHS = ["vol1.pdf", "vol2.pdf"]
# # Initialize an empty list to store all documents
# all_docs = []
# # Iterate through each file path in FILE_PATHS
# for file_path in FILE_PATHS:
# # Load the PDF using PyPDFLoader
# loader = PyPDFLoader(file_path)
# # Split the loaded document using RecursiveCharacterTextSplitter
# text_splitter = RecursiveCharacterTextSplitter(
# chunk_size=1000,
# chunk_overlap=200,
# )
# # Load and split the current document and append to all_docs
# documents = loader.load_and_split(text_splitter)
# all_docs.extend(documents)
# # Set up embeddings and vector store
# HF_EMBED_MODEL_ID = "BAAI/bge-small-en-v1.5"
# embeddings = HuggingFaceEmbeddings(model_name=HF_EMBED_MODEL_ID)
# vectorstore = FAISS.from_documents(all_docs, embeddings)
# Set up LLM
HF_API_KEY = os.environ.get("HF_API_KEY") # Ensure this is set in Hugging Face Space settings
HF_LLM_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
llm = HuggingFaceEndpoint(
repo_id=HF_LLM_MODEL_ID,
huggingfacehub_api_token=HF_API_KEY,
)
# Create RAG pipeline
retriever = vectorstore.as_retriever()
prompt = PromptTemplate.from_template(
"Context information is below.\\n---------------------\\n{context}\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: {question}\\nAnswer:\\n"
)
rag_chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# Gradio interface
def ask_question(question):
return rag_chain.invoke(question)
iface = gr.Interface(fn=ask_question, inputs="text", outputs="text")
iface.launch()