Mona-abdelazim commited on
Commit
a8efc23
1 Parent(s): 18aae43

Upload 4 files

Browse files
Files changed (4) hide show
  1. RAG_GGUF.py +82 -0
  2. alzaheimer.pdf +0 -0
  3. app.py +86 -0
  4. start.py +17 -0
RAG_GGUF.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import time
3
+ import psutil
4
+ import glob
5
+ import PyPDF2
6
+ #import chromadb
7
+ from transformers import (
8
+ AutoTokenizer,
9
+ AutoModelForSeq2SeqLM,
10
+ AutoTokenizer, AutoModelForCausalLM,
11
+ pipeline
12
+ )
13
+ from transformers import LlamaTokenizer, LlamaForCausalLM,BitsAndBytesConfig
14
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
15
+ from langchain_community.document_loaders import PyPDFLoader
16
+ from langchain_community.vectorstores import Chroma
17
+ from langchain_community.embeddings import HuggingFaceEmbeddings
18
+ from llama_cpp import Llama
19
+
20
+
21
+
22
+ def RAG_Chain(pdf_file,question,llama_model):
23
+ model_path = "/home/mona/Downloads/Pubmed_model_GGUF"
24
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
25
+ doc = ""
26
+ for page_num in range(len(pdf_reader.pages) ):
27
+ page = pdf_reader.pages[page_num]
28
+ doc += page.extract_text()
29
+
30
+ # Check if any documents were loaded
31
+ if not doc:
32
+ raise ValueError("No documents found. Please check the PDF directory path.")
33
+
34
+ # Split the loaded documents into chunks
35
+ text_splitter = RecursiveCharacterTextSplitter(
36
+ chunk_size=1000, chunk_overlap=200)
37
+ splits = text_splitter.split_text(doc)
38
+
39
+ # Create HuggingFace embeddings and vector store
40
+ embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2' # Efficient model suitable for most tasks
41
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
42
+ __import__('pysqlite3')
43
+ import sys
44
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
45
+ import chromadb
46
+
47
+ chromadb.api.client.SharedSystemClient.clear_system_cache()
48
+
49
+ vectorstore = Chroma.from_texts(texts=splits, embedding=embeddings)
50
+
51
+ # Define the retriever using Chroma
52
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
53
+
54
+ # Retrieve relevant documents
55
+ retrieved_docs = retriever.get_relevant_documents(question)
56
+ if not retrieved_docs:
57
+ return "No relevant information found in the documents."
58
+
59
+ # Format the context
60
+ formatted_context = "\n\n".join(doc.page_content for doc in retrieved_docs)
61
+
62
+ # Prepare the prompt for the LLM
63
+ formatted_prompt = (
64
+ f"Answer the question based on the context below.\n\n"
65
+ f"Context:\n{formatted_context}\n\nQuestion: {question}\n\nAnswer:"
66
+ )
67
+ answer = llama_model(formatted_prompt)
68
+ return answer["choices"][0]["text"]
69
+
70
+ # Instantiate the Llama model using the gguf file
71
+ '''
72
+ llama_model = Llama(
73
+ model_path,
74
+ n_ctx=2048, # Context length
75
+ #n_threads=8, # Number of CPU threads to use
76
+ temperature=0.7, # Sampling temperature
77
+ n_gpu_layers=2
78
+ )
79
+ '''
80
+ # Generate the answer
81
+
82
+
alzaheimer.pdf ADDED
Binary file (14.3 kB). View file
 
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ from io import StringIO
4
+ import PyPDF2
5
+ from RAG_GGUF import RAG_Chain
6
+ from llama_cpp import Llama
7
+
8
+
9
+
10
+ # Function to send pdf file to RAG pipeline
11
+ def read_pdf(file):
12
+ pdf_reader = PyPDF2.PdfReader(file)
13
+ text = ""
14
+ for page_num in range(len(pdf_reader.pages) ):
15
+ page = pdf_reader.pages[page_num]
16
+ text += page.extract_text()
17
+ return text
18
+
19
+ st.title("Talk with Your PDF")
20
+
21
+
22
+
23
+ # PDF Upload
24
+
25
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
26
+ if uploaded_file is not None:
27
+ # Display the file name
28
+ st.write(f"File uploaded: {uploaded_file.name}")
29
+ # Read and display the content of the uploaded PDF file
30
+ try:
31
+ pdf_content = read_pdf(uploaded_file)
32
+ st.text_area("PDF Content", pdf_content, height=300)
33
+ except Exception as e:
34
+ st.error(f"Error reading PDF: {e}")
35
+ # Input field for user messages
36
+ user_input = st.text_input("You:", "")
37
+ else:
38
+ st.text_area("PDF Content","Please Upload File",height=300)
39
+
40
+
41
+
42
+ # Initialize a session state for chat history
43
+ if 'chat_history' not in st.session_state:
44
+ st.session_state.chat_history = []
45
+ model_path = "/home/mona/Downloads/Pubmed_model_GGUF"
46
+ llama_model = Llama(
47
+ model_path,
48
+ n_ctx=2048, # Context length
49
+ #n_threads=8, # Number of CPU threads to use
50
+ temperature=0.7, # Sampling temperature
51
+ n_gpu_layers=4
52
+ )
53
+
54
+ # Handle user input
55
+ if st.button("Send"):
56
+ #import chromadb.api
57
+
58
+ #chromadb.api.client.SharedSystemClient.clear_system_cache()
59
+ if user_input:
60
+ # Get the GPT response
61
+ gpt_response = RAG_Chain(uploaded_file,user_input,llama_model)
62
+
63
+ # Store the conversation
64
+ st.session_state.chat_history.append(("User", user_input))
65
+ st.session_state.chat_history.append(("BOT", gpt_response))
66
+
67
+ # Clear the input box
68
+ user_input = ""
69
+
70
+ # Display chat history
71
+ for speaker, message in st.session_state.chat_history:
72
+ if speaker == "User":
73
+ st.markdown(f"**{speaker}:** {message}")
74
+ else:
75
+ st.markdown(f"**{speaker}:** {message}")
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
start.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import subprocess
3
+ import os
4
+ def main():
5
+ print("Starting the app!")
6
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
7
+ result = subprocess.run(["streamlit run app.py"], shell=True, capture_output=False, text=True)
8
+ while True:
9
+ command = input("Type 'exit' to quit or anything else to continue: ").lower()
10
+ if command == 'exit':
11
+ print("Exiting the app. Goodbye!")
12
+ break
13
+ else:
14
+ print(f"You typed: {command}")
15
+
16
+ if __name__ == "__main__":
17
+ main()