yuvaranianandhan24 commited on
Commit
6f3d7e0
1 Parent(s): 57a93d9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
3
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
4
+ from dotenv import load_dotenv
5
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
+ from llama_index.core import Settings
7
+ import os
8
+ import base64
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Configure the Llama index settings
14
+ Settings.llm = HuggingFaceInferenceAPI(
15
+ model_name="meta-llama/Meta-Llama-3-8B-Instruct",
16
+ tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
17
+ context_window=3900,
18
+ token=os.getenv("HF_TOKEN"),
19
+ max_new_tokens=1000,
20
+ generate_kwargs={"temperature": 0.1},
21
+ )
22
+ Settings.embed_model = HuggingFaceEmbedding(
23
+ model_name="BAAI/bge-small-en-v1.5"
24
+ )
25
+
26
+ # Define the directory for persistent storage and data
27
+ PERSIST_DIR = "./db"
28
+ DATA_DIR = "data"
29
+
30
+ # Ensure data directory exists
31
+ os.makedirs(DATA_DIR, exist_ok=True)
32
+ os.makedirs(PERSIST_DIR, exist_ok=True)
33
+
34
+ def displayPDF(file):
35
+ with open(file, "rb") as f:
36
+ base64_pdf = base64.b64encode(f.read()).decode('utf-8')
37
+ pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
38
+ st.markdown(pdf_display, unsafe_allow_html=True)
39
+
40
+ def data_ingestion():
41
+ documents = SimpleDirectoryReader(DATA_DIR).load_data()
42
+ storage_context = StorageContext.from_defaults()
43
+ index = VectorStoreIndex.from_documents(documents)
44
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
45
+
46
+ def handle_query(query):
47
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
48
+ index = load_index_from_storage(storage_context)
49
+ chat_text_qa_msgs = [
50
+ (
51
+ "user",
52
+ """You are Q&A assistant. For all other inquiries, your main goal is to provide answers as accurately as possible,
53
+ based on the instructions and context you have been given. If a question does not match the provided context or is outside
54
+ the scope of the document, kindly advise the user to ask questions within the context of the document.
55
+ Context:
56
+ {context_str}
57
+ Question:
58
+ {query_str}
59
+ """
60
+ )
61
+ ]
62
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
63
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template)
64
+ answer = query_engine.query(query)
65
+
66
+ if hasattr(answer, 'response'):
67
+ return answer.response
68
+ elif isinstance(answer, dict) and 'response' in answer:
69
+ return answer['response']
70
+ else:
71
+ return "Sorry, I couldn't find an answer."
72
+
73
+
74
+ # Streamlit app initialization
75
+ st.title("Chat with your PDF 🦜📄")
76
+ st.markdown("chat here👇")
77
+
78
+ if 'messages' not in st.session_state:
79
+ st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
80
+
81
+ with st.sidebar:
82
+ st.title("Menu:")
83
+ uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
84
+ if st.button("Submit & Process"):
85
+ with st.spinner("Processing..."):
86
+ filepath = "data/saved_pdf.pdf"
87
+ with open(filepath, "wb") as f:
88
+ f.write(uploaded_file.getbuffer())
89
+ # displayPDF(filepath) # Display the uploaded PDF
90
+ data_ingestion() # Process PDF every time new file is uploaded
91
+ st.success("Done")
92
+
93
+ user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
94
+ if user_prompt:
95
+ st.session_state.messages.append({'role': 'user', "content": user_prompt})
96
+ response = handle_query(user_prompt)
97
+ st.session_state.messages.append({'role': 'assistant', "content": response})
98
+
99
+ for message in st.session_state.messages:
100
+ with st.chat_message(message['role']):
101
+ st.write(message['content'])