import streamlit as st from model.questionAnsweringBot import QuestionAnsweringBot from model.retriever import Retriever def process_query(llm_key, query, retrieval_method): if "retriever" not in st.session_state: st.session_state.retriever = Retriever() print("Loading and preparing dataset...") st.session_state.retriever.load_and_prepare_dataset() st.session_state.retriever.prepare_bm25() st.session_state.retriever.compute_embeddings() retriever = st.session_state.retriever if retrieval_method == "BM25": print("Retrieving documents using BM25...") retrieved_docs = retriever.retrieve_documents_bm25(query) else: print("Retrieving documents using Semantic Search...") retrieved_docs = retriever.retrieve_documents_semantic(query) bot = QuestionAnsweringBot(llm_key) prompt = getPrompt(retrieved_docs, query) answer = bot.generate_answer(prompt) return retrieved_docs, answer def getPrompt(retrieved_docs, query): prompt = ( "You are an LM integrated into an RAG system that answers questions based on provided documents.\n" "Rules:\n" "- Reply with the answer only and nothing but the answer.\n" "- Say 'I don't know' if you don't know the answer.\n" "- Use only the provided documents.\n" "- Citations are required. Include the document and chunk number in square brackets after the information (e.g., [Document 1, Chunk 2]).\n\n" "Documents:\n" ) for i, doc in enumerate(retrieved_docs): prompt += f"Document {i + 1}: {doc}\n" prompt += f"\nQuery: {query}\n" return prompt