import gradio as gr from langchain.schema import HumanMessage, AIMessage from langchain.chains import create_retrieval_chain, create_history_aware_retriever from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_community.vectorstores import Chroma from langchain_community.llms import HuggingFacePipeline from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline import os from huggingface_hub import login # Read the Hugging Face token hf_token = os.getenv("SP_7") if hf_token is None: raise ValueError( "Hugging Face token 'SP_7' is not set in the environment variables. " "Ensure it is defined in the .env file." ) # Log in to Hugging Face login(token=hf_token) # Load the pre-existing vector store vector_store = Chroma(persist_directory="./bg_data_english") similarity_retriever = vector_store.as_retriever( search_type="similarity_score_threshold", search_kwargs={"k": 5, "score_threshold": 0.2} ) # Load the LLM # quantization_config = BitsAndBytesConfig(load_in_8bit=True, device = "cpu") tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it") llm_model = AutoModelForCausalLM.from_pretrained( "google/gemma-2-9b-it" ) text_generation_pipeline = pipeline( model=llm_model, tokenizer=tokenizer, task="text-generation", return_full_text=False, max_new_tokens=350, ) llm = HuggingFacePipeline(pipeline=text_generation_pipeline) # Reformulating user queries with history context rephrase_system_prompt = """Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.""" rephrase_prompt = ChatPromptTemplate.from_messages( [ ("system", rephrase_system_prompt), MessagesPlaceholder("chat_history"), ("human", "{input}"), ] ) history_aware_retriever = create_history_aware_retriever(llm, similarity_retriever, rephrase_prompt) # Define the question-answering system prompt qa_system_prompt = """You are a saintly guide inspired by the teachings of the Bhagavad Gita, offering wisdom and moral guidance. Answer questions in a friendly and compassionate tone, drawing insights from the scripture to help users with their life challenges. Use the provided context to craft your response and remain faithful to the philosophy of the Bhagavad Gita. If you don't know the answer, humbly admit it or request the user to clarify or provide more details. Limit your response to 5 lines unless the user explicitly asks for more explanation. Question: {input} Context: {context} Answer: """ qa_prompt = ChatPromptTemplate.from_messages( [ ("system", qa_system_prompt), MessagesPlaceholder("chat_history"), ("human", "{input}"), ] ) question_answer_chain = create_stuff_documents_chain(llm, qa_prompt) qa_rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain) # Function to generate answers chat_history = [] def chat(question): global chat_history response = qa_rag_chain.invoke({"input": question, "chat_history": chat_history}) answer = response["answer"].strip() if answer.startswith("Saintly Guide:"): answer = answer[len("Saintly Guide:"):].strip() elif answer.startswith("AI:"): answer = answer[len("AI:"):].strip() chat_history.extend([HumanMessage(content=question), AIMessage(content=response["answer"])]) return answer # Create Gradio interface interface = gr.Interface( fn=chat, inputs=gr.Textbox(label="Ask your question", placeholder="What's troubling you?"), outputs=gr.Textbox(label="Answer"), title="Bhagavad Gita Chatbot", description="Ask questions inspired by the teachings of the Bhagavad Gita and receive saintly guidance." ) # Launch the app if __name__ == "__main__": interface.launch()