Spaces:

0xdant
/

llm-ai-assistant

Sleeping

File size: 4,995 Bytes

import os
import torch
from langchain_core.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.prompts import MessagesPlaceholder

from langchain.chains import create_history_aware_retriever

from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory



# Check for GPU availability and set the appropriate device for computation.
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# Global variables
conversation_retrieval_chain = None
chat_history = []
llm_hub = None
embeddings = None
tokenizer = None

# Function to initialize the language model and its embeddings
def init_llm():
    global llm_hub, embeddings

    # Hugging Face API token
    # Setup environment variable HUGGINGFACEHUB_API_TOKEN

    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

    llm_hub = HuggingFaceEndpoint(
        repo_id=model_id,
        task="text-generation",
        max_new_tokens=200,
        do_sample=False,
        repetition_penalty=1.03,
        return_full_text=False,
        temperature=0.1,
    )

    from langchain.embeddings import HuggingFaceEmbeddings
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

store = {}
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


# Function to process a PDF document
def process_document(document_path):
    global conversation_retrieval_chain

    # Load the document
    loader = PyPDFLoader(document_path)
    documents = loader.load()
    
    # Split the document into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
    texts = text_splitter.split_documents(documents)
    
    # Create an embeddings database using FAISS from the split text chunks.
    db = FAISS.from_documents(documents=texts, embedding=embeddings)

    system_prompt = """
    <|start_header_id|>user<|end_header_id|>
    You are an assistant for answering questions using provided context.
    You are given the extracted parts of a long document, previous chat_history and a question. Provide a conversational answer.
    If you don't know the answer, just say "I do not know." Don't make up an answer.
    Question: {input}
    Context: {context}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    
    retriever=db.as_retriever(search_type="similarity", search_kwargs={'k': 3, 'lambda_mult': 0.25})
    question_answer_chain = create_stuff_documents_chain(llm_hub, prompt)    
    # conversation_retrieval_chain = create_retrieval_chain(retriever, question_answer_chain)

    contextualize_q_system_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    history_aware_retriever = create_history_aware_retriever(
        llm_hub, retriever, contextualize_q_prompt
    )

    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

    conversation_retrieval_chain = RunnableWithMessageHistory(
        rag_chain,
        get_session_history,
        input_messages_key="input",
        history_messages_key="chat_history",
        output_messages_key="answer",
    )


# Function to process a user prompt
def process_prompt(prompt):
    # global conversation_retrieval_chain
    global chat_history   
    
    # Query the model with history    
    output = conversation_retrieval_chain.invoke(
        {"input": prompt},
        config={
            "configurable": {"session_id": "abc123"}
        },  # constructs a key "abc123" in `store`.
    )
    answer = output["answer"]
    print(output)
    
    # Return the model's response
    return answer

# Initialize the language model
init_llm()