Spaces:
Sleeping
Sleeping
# import os | |
# import gradio as gr | |
# from langchain_community.vectorstores import FAISS | |
# from langchain_community.embeddings import HuggingFaceEmbeddings | |
# from langchain_community.document_loaders import PyPDFLoader | |
# from langchain.text_splitter import RecursiveCharacterTextSplitter | |
# from langchain.chains import ConversationalRetrievalChain | |
# from langchain.memory import ConversationBufferMemory | |
# from langchain_huggingface import HuggingFaceEndpoint | |
# # Set token for Hugging Face Inference API | |
# os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
# # Load embedding model | |
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
# # Initialize LLM | |
# llm = HuggingFaceEndpoint( | |
# repo_id="mistralai/Mistral-7B-Instruct-v0.3", | |
# task="text-generation", | |
# max_new_tokens=128, | |
# temperature=0.5, | |
# ) | |
# # Memory for conversation context | |
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
# # Shared state | |
# qa_chain = None | |
# # Step 1: Upload + Process PDF | |
# def process_pdf(pdf_file): | |
# global qa_chain | |
# loader = PyPDFLoader(pdf_file.name) | |
# pages = loader.load() | |
# splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
# chunks = splitter.split_documents(pages) | |
# vectordb = FAISS.from_documents(chunks, embeddings) | |
# qa_chain = ConversationalRetrievalChain.from_llm( | |
# llm=llm, | |
# retriever=vectordb.as_retriever(), | |
# memory=memory | |
# ) | |
# return "PDF processed! You can now ask questions." | |
# # Step 2: Chatbot logic | |
# def chat_fn(message, history=[]): | |
# if qa_chain is None: | |
# return history + [[message, "β οΈ Please upload a PDF first."]], history | |
# answer = qa_chain.run(message) | |
# history.append((message, answer)) | |
# return history, history | |
# # Gradio UI | |
# with gr.Blocks() as demo: | |
# gr.Markdown("## π Chat with PDF using Mistral 7B + LangChain") | |
# upload = gr.File(label="Upload a PDF", type="filepath") | |
# status = gr.Textbox(label="Status", interactive=False) | |
# upload.change(fn=process_pdf, inputs=upload, outputs=status) | |
# gr.ChatInterface(chat_fn) | |
# demo.launch() | |
# import os | |
# import gradio as gr | |
# from langchain_community.vectorstores import FAISS | |
# from langchain_community.embeddings import HuggingFaceEmbeddings | |
# from langchain_community.document_loaders import PyPDFLoader | |
# from langchain.text_splitter import RecursiveCharacterTextSplitter | |
# from langchain.chains import ConversationalRetrievalChain | |
# from langchain.memory import ConversationBufferMemory | |
# from langchain_huggingface import HuggingFaceEndpoint | |
# # Set token for Hugging Face Inference API | |
# os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
# # Load embedding model | |
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
# # Initialize LLM | |
# llm = HuggingFaceEndpoint( | |
# repo_id="mistralai/Mistral-7B-Instruct-v0.3", | |
# task="text-generation", | |
# max_new_tokens=128, | |
# temperature=0.5, | |
# ) | |
# # Memory for conversation context | |
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
# # Shared state | |
# qa_chain = None | |
# # Step 1: Upload + Process PDF | |
# def process_pdf(pdf_file): | |
# global qa_chain | |
# loader = PyPDFLoader(pdf_file.name) | |
# pages = loader.load() | |
# splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
# chunks = splitter.split_documents(pages) | |
# vectordb = FAISS.from_documents(chunks, embeddings) | |
# qa_chain = ConversationalRetrievalChain.from_llm( | |
# llm=llm, | |
# retriever=vectordb.as_retriever(), | |
# memory=memory | |
# ) | |
# return "β PDF processed! You can now chat.", gr.update(interactive=True), gr.update(interactive=True) | |
# # Step 2: Chatbot logic | |
# def chat_fn(message, history=[]): | |
# if qa_chain is None: | |
# return history + [[message, "β οΈ Please upload a PDF first."]], history | |
# answer = qa_chain.run(message) | |
# history.append((message, answer)) | |
# return history, history | |
# # Gradio UI | |
# with gr.Blocks() as demo: | |
# gr.Markdown("## π Chat with PDF using Mistral 7B + LangChain") | |
# upload = gr.File(label="Upload a PDF", file_types=[".pdf"]) | |
# status = gr.Textbox(label="Status", interactive=False) | |
# chatbot = gr.Chatbot(label="Chat") | |
# message = gr.Textbox(label="Ask a question", placeholder="Type your question...", interactive=False) | |
# send_btn = gr.Button("Send", interactive=False) | |
# upload.change(fn=process_pdf, inputs=upload, outputs=[status, message, send_btn]) | |
# send_btn.click(fn=chat_fn, inputs=message, outputs=[chatbot, chatbot]) | |
# demo.launch(share=True) | |
import os | |
import gradio as gr | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.memory import ConversationBufferMemory | |
from langchain_huggingface import HuggingFaceEndpoint | |
# Set token for Hugging Face Inference API | |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
# Load embedding model | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
# Initialize LLM | |
llm = HuggingFaceEndpoint( | |
repo_id="mistralai/Mistral-7B-Instruct-v0.3", | |
task="text-generation", | |
max_new_tokens=128, | |
temperature=0.5, | |
) | |
# Step 1: Upload + Process PDF | |
def process_pdf(pdf_file): | |
loader = PyPDFLoader(pdf_file.name) | |
pages = loader.load() | |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
chunks = splitter.split_documents(pages) | |
vectordb = FAISS.from_documents(chunks, embeddings) | |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
qa_chain = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
retriever=vectordb.as_retriever(), | |
memory=memory | |
) | |
return qa_chain, "β PDF processed! You can now chat.", gr.update(interactive=True), gr.update(interactive=True) | |
# Step 2: Chatbot logic | |
def chat_fn(message, chat_history, qa_chain): | |
if qa_chain is None: | |
return chat_history + [[message, "β οΈ Please upload a PDF first."]], chat_history, qa_chain | |
answer = qa_chain.run(message) | |
chat_history.append((message, answer)) | |
return chat_history, chat_history, qa_chain | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("## π Chat with any PDF using Mistral 7B + LangChain") | |
qa_chain_state = gr.State(None) | |
chat_state = gr.State([]) | |
upload = gr.File(label="Upload a PDF", file_types=[".pdf"]) | |
status = gr.Textbox(label="Status", interactive=False) | |
chatbot = gr.Chatbot(label="Chat") | |
message = gr.Textbox(label="Ask a question", placeholder="Type your question...", interactive=False) | |
send_btn = gr.Button("Send", interactive=False) | |
upload.change(fn=process_pdf, inputs=upload, outputs=[qa_chain_state, status, message, send_btn]) | |
send_btn.click(fn=chat_fn, inputs=[message, chat_state, qa_chain_state], outputs=[chatbot, chat_state, qa_chain_state]) | |
demo.launch(share=True) | |