Spaces:
Sleeping
Sleeping
# Import required libraries | |
import PyPDF2 | |
from getpass import getpass | |
from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser | |
from haystack.document_stores import InMemoryDocumentStore | |
from haystack import Document, Pipeline | |
from haystack.nodes import BM25Retriever | |
from pprint import pprint | |
import streamlit as st | |
import logging | |
from dotenv import load_dotenv | |
load_dotenv() | |
import os | |
import logging | |
logging.basicConfig(level=logging.DEBUG) | |
# Function to extract text from a PDF | |
def extract_text_from_pdf(pdf_path): | |
text = "" | |
with open(pdf_path, "rb") as pdf_file: | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
for page_num in range(len(pdf_reader.pages)): | |
page = pdf_reader.pages[page_num] | |
text += page.extract_text() or "" | |
return text | |
# Extract text from the PDF file | |
pdf_file_path = "Data/MR. MPROFY.pdf" | |
pdf_text = extract_text_from_pdf(pdf_file_path) | |
if not pdf_text: | |
raise ValueError("No text extracted from PDF.") | |
# Create a Haystack document | |
doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"}) | |
# Initialize Document Store | |
document_store = InMemoryDocumentStore(use_bm25=True) | |
document_store.write_documents([doc]) | |
# Initialize Retriever | |
retriever = BM25Retriever(document_store=document_store, top_k=2) | |
# Define QA Template | |
qa_template = PromptTemplate( | |
prompt=""" | |
Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions. | |
I won’t ask any follow-up questions myself. | |
If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer. | |
Context: {join(documents)}; | |
Question: {query} | |
Answer: | |
""", | |
output_parser=AnswerParser() | |
) | |
# Get Huggingface token | |
HF_TOKEN = HF_TOKEN | |
# Initialize Prompt Node | |
prompt_node = PromptNode( | |
model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1", | |
api_key=HF_TOKEN, | |
default_prompt_template=qa_template, | |
max_length=500, | |
model_kwargs={"model_max_length": 5000} | |
) | |
# Build Pipeline | |
rag_pipeline = Pipeline() | |
rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"]) | |
rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"]) | |
# Streamlit Function for Handling Input and Displaying Output | |
def run_streamlit_app(): | |
st.title("Mprofier - AI Assistant") | |
query_text = st.text_input("Enter your question:") | |
if st.button("Get Answer"): | |
response = rag_pipeline.run(query=query_text) | |
answer = response["answers"][0].answer if response["answers"] else "No answer found." | |
st.write(answer) | |
# Start the Streamlit application | |
if __name__ == "__main__": | |
run_streamlit_app() |