Spaces:

Mona-abdelazim
/

Talk_with_your_pdf

Sleeping

App Files Files Community

Mona-abdelazim commited on Oct 18

Commit

a8efc23

•

1 Parent(s): 18aae43

Upload 4 files

Browse files

Files changed (4) hide show

RAG_GGUF.py +82 -0
alzaheimer.pdf +0 -0
app.py +86 -0
start.py +17 -0

RAG_GGUF.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import time
+import psutil
+import glob
+import PyPDF2
+#import chromadb
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer, AutoModelForCausalLM,
+    pipeline
+)
+from transformers import LlamaTokenizer, LlamaForCausalLM,BitsAndBytesConfig
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from llama_cpp import Llama
+def RAG_Chain(pdf_file,question,llama_model):
+    model_path = "/home/mona/Downloads/Pubmed_model_GGUF"
+    pdf_reader = PyPDF2.PdfReader(pdf_file)
+    doc = ""
+    for page_num in range(len(pdf_reader.pages) ):
+        page = pdf_reader.pages[page_num]
+        doc += page.extract_text()
+    # Check if any documents were loaded
+    if not doc:
+        raise ValueError("No documents found. Please check the PDF directory path.")
+    # Split the loaded documents into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000, chunk_overlap=200)
+    splits = text_splitter.split_text(doc)
+    # Create HuggingFace embeddings and vector store
+    embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2'  # Efficient model suitable for most tasks
+    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
+    __import__('pysqlite3')
+    import sys
+    sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
+    import chromadb
+    chromadb.api.client.SharedSystemClient.clear_system_cache()
+    vectorstore = Chroma.from_texts(texts=splits, embedding=embeddings)
+        # Define the retriever using Chroma
+    retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
+    # Retrieve relevant documents
+    retrieved_docs = retriever.get_relevant_documents(question)
+    if not retrieved_docs:
+        return "No relevant information found in the documents."
+    # Format the context
+    formatted_context = "\n\n".join(doc.page_content for doc in retrieved_docs)
+    # Prepare the prompt for the LLM
+    formatted_prompt = (
+        f"Answer the question based on the context below.\n\n"
+        f"Context:\n{formatted_context}\n\nQuestion: {question}\n\nAnswer:"
+    )
+    answer = llama_model(formatted_prompt)
+    return answer["choices"][0]["text"]
+     # Instantiate the Llama model using the gguf file
+'''
+    llama_model = Llama(
+        model_path,
+        n_ctx=2048,          # Context length
+        #n_threads=8,         # Number of CPU threads to use
+        temperature=0.7,      # Sampling temperature
+        n_gpu_layers=2
+    )
+'''
+    # Generate the answer

alzaheimer.pdf ADDED Viewed

Binary file (14.3 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import streamlit as st
+import PyPDF2
+from io import StringIO
+import PyPDF2
+from RAG_GGUF import RAG_Chain
+from llama_cpp import Llama
+# Function to send pdf file to RAG pipeline
+def read_pdf(file):
+    pdf_reader = PyPDF2.PdfReader(file)
+    text = ""
+    for page_num in range(len(pdf_reader.pages) ):
+        page = pdf_reader.pages[page_num]
+        text += page.extract_text()
+    return text
+st.title("Talk with Your PDF")
+# PDF Upload
+uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
+if uploaded_file is not None:
+        # Display the file name
+    st.write(f"File uploaded: {uploaded_file.name}")
+    # Read and display the content of the uploaded PDF file
+    try:
+        pdf_content = read_pdf(uploaded_file)
+        st.text_area("PDF Content", pdf_content, height=300)
+    except Exception as e:
+        st.error(f"Error reading PDF: {e}")
+# Input field for user messages
+    user_input = st.text_input("You:", "")
+else:
+ st.text_area("PDF Content","Please Upload File",height=300)
+# Initialize a session state for chat history
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+model_path = "/home/mona/Downloads/Pubmed_model_GGUF"
+llama_model = Llama(
+        model_path,
+        n_ctx=2048,          # Context length
+        #n_threads=8,         # Number of CPU threads to use
+        temperature=0.7,      # Sampling temperature
+        n_gpu_layers=4
+)
+# Handle user input
+if st.button("Send"):
+    #import chromadb.api
+    #chromadb.api.client.SharedSystemClient.clear_system_cache()
+    if user_input:
+        # Get the GPT response
+        gpt_response = RAG_Chain(uploaded_file,user_input,llama_model)
+        # Store the conversation
+        st.session_state.chat_history.append(("User", user_input))
+        st.session_state.chat_history.append(("BOT", gpt_response))
+        # Clear the input box
+        user_input = ""
+# Display chat history
+    for speaker, message in st.session_state.chat_history:
+        if speaker == "User":
+            st.markdown(f"**{speaker}:** {message}")
+        else:
+            st.markdown(f"**{speaker}:** {message}")

start.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# app.py
+import subprocess
+import os
+def main():
+    print("Starting the app!")
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    result = subprocess.run(["streamlit run app.py"], shell=True, capture_output=False, text=True)
+    while True:
+        command = input("Type 'exit' to quit or anything else to continue: ").lower()
+        if command == 'exit':
+            print("Exiting the app. Goodbye!")
+            break
+        else:
+            print(f"You typed: {command}")
+if __name__ == "__main__":
+    main()