import streamlit as st import os from huggingface_hub import InferenceClient from langchain_community.document_loaders import PDFPlumberLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_core.vectorstores import InMemoryVectorStore from langchain_core.prompts import ChatPromptTemplate from langchain.embeddings import HuggingFaceEmbeddings # Set up Hugging Face model and token model = "mistralai/Mixtral-8x7B-Instruct-v0.1" # You can change to a model of your choice from Hugging Face access_token = os.getenv("HF_TOKEN") # Your Hugging Face API token client = InferenceClient(model=model, token=access_token) # Template for response generation template = """ You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise. Question: {question} Context: {context} Answer: """ # Directory to store uploaded PDFs pdfs_directory = '../pdfs' os.makedirs(pdfs_directory, exist_ok=True) # Initialize the embedding model embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") # You can choose any model from Hugging Face # Initialize the vector store for document indexing vector_store = InMemoryVectorStore(embedding=embedding) # Function to upload PDF file def upload_pdf(file): with open(pdfs_directory + file.name, "wb") as f: f.write(file.getbuffer()) # Function to load PDF content def load_pdf(file_path): loader = PDFPlumberLoader(file_path) documents = loader.load() return documents # Function to split text into manageable chunks def split_text(documents): text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, add_start_index=True ) return text_splitter.split_documents(documents) # Function to index documents in the vector store def index_docs(documents): vector_store.add_documents(documents) # Function to retrieve relevant documents based on query def retrieve_docs(query): return vector_store.similarity_search(query) # Function to generate an answer based on retrieved documents using text generation def answer_question(question, documents): context = "\n\n".join([doc.page_content for doc in documents]) full_context = f"{context}" prompt = ChatPromptTemplate.from_template(template) # Format the prompt with the user's question and context question_with_context = prompt.format(question=question, context=full_context) # Use the Hugging Face InferenceClient's text_generation method generate_kwargs = { "temperature": 0.7, # Control the creativity of the generated response "max_new_tokens": 150, # Limit the length of the output "top_p": 0.9 # Control diversity via nucleus sampling } # Generate the response using the text generation method response = client.text_generation(question_with_context, **generate_kwargs) # Print the response to inspect its structure print(f"Response: {response}") # If the response is a string, we can directly return it if isinstance(response, str): return response else: # If it's a dictionary, extract the generated text return response.get("generated_text", "No answer generated.") # Streamlit file uploader for PDF uploaded_file = st.file_uploader( "Upload PDF", type="pdf", accept_multiple_files=False ) if uploaded_file: # Upload, load, split, and index documents upload_pdf(uploaded_file) documents = load_pdf(pdfs_directory + uploaded_file.name) chunked_documents = split_text(documents) index_docs(chunked_documents) # User input for a question question = st.chat_input() if question: st.chat_message("user").write(question) related_documents = retrieve_docs(question) answer = answer_question(question, related_documents) st.chat_message("assistant").write(answer)