mprateek commited on
Commit
de24c66
1 Parent(s): 84e38cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -15
app.py CHANGED
@@ -7,9 +7,10 @@ from llama_index.core import Settings
7
  import os
8
  import base64
9
 
10
-
11
  load_dotenv()
12
 
 
13
  Settings.llm = HuggingFaceInferenceAPI(
14
  model_name="google/gemma-1.1-7b-it",
15
  tokenizer_name="google/gemma-1.1-7b-it",
@@ -22,21 +23,21 @@ Settings.embed_model = HuggingFaceEmbedding(
22
  model_name="BAAI/bge-small-en-v1.5"
23
  )
24
 
25
- # Define the directory for persistent storage and data
26
  PERSIST_DIR = "./db"
27
  DATA_DIR = "data"
28
 
29
- # Ensure data directory exists
30
  os.makedirs(DATA_DIR, exist_ok=True)
31
  os.makedirs(PERSIST_DIR, exist_ok=True)
32
 
33
- def displayPDF(file):
34
  with open(file, "rb") as f:
35
  base64_pdf = base64.b64encode(f.read()).decode('utf-8')
36
  pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
37
  st.markdown(pdf_display, unsafe_allow_html=True)
38
 
39
- def data_ingestion():
40
  documents = SimpleDirectoryReader(DATA_DIR).load_data()
41
  storage_context = StorageContext.from_defaults()
42
  index = VectorStoreIndex.from_documents(documents)
@@ -48,7 +49,7 @@ def handle_query(query):
48
  chat_text_qa_msgs = [
49
  (
50
  "user",
51
- """You are a Q&A chatbot named Chatbot, created by Prateek. your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
52
  Context:
53
  {context_str}
54
  Question:
@@ -68,14 +69,12 @@ def handle_query(query):
68
  else:
69
  return "Sorry, I couldn't find an answer."
70
 
71
-
72
- # Streamlit app initialization
73
- st.title("Chat with your PDF")
74
- st.markdown("Built by PRATEEK(https://github.com/theSuriya)")
75
-
76
 
77
  if 'messages' not in st.session_state:
78
- st.session_state.messages = [{'role': 'assistant', "content": 'Chat to PDF'}]
79
 
80
  with st.sidebar:
81
  st.title("Menu:")
@@ -85,7 +84,7 @@ with st.sidebar:
85
  filepath = "data/saved_pdf.pdf"
86
  with open(filepath, "wb") as f:
87
  f.write(uploaded_file.getbuffer())
88
- data_ingestion()
89
  st.success("Done")
90
 
91
  user_prompt = st.chat_input("Query")
@@ -95,5 +94,4 @@ if user_prompt:
95
  st.session_state.messages.append({'role': 'assistant', "content": response})
96
 
97
  for message in st.session_state.messages:
98
- with st.chat_message(message['role']):
99
- st.write(message['content'])
 
7
  import os
8
  import base64
9
 
10
+ # Load environment variables
11
  load_dotenv()
12
 
13
+ # Configure LLM and Embedding settings
14
  Settings.llm = HuggingFaceInferenceAPI(
15
  model_name="google/gemma-1.1-7b-it",
16
  tokenizer_name="google/gemma-1.1-7b-it",
 
23
  model_name="BAAI/bge-small-en-v1.5"
24
  )
25
 
26
+ # Define directory paths
27
  PERSIST_DIR = "./db"
28
  DATA_DIR = "data"
29
 
30
+ # Create directories if they don't exist
31
  os.makedirs(DATA_DIR, exist_ok=True)
32
  os.makedirs(PERSIST_DIR, exist_ok=True)
33
 
34
+ def display_pdf(file):
35
  with open(file, "rb") as f:
36
  base64_pdf = base64.b64encode(f.read()).decode('utf-8')
37
  pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
38
  st.markdown(pdf_display, unsafe_allow_html=True)
39
 
40
+ def ingest_data():
41
  documents = SimpleDirectoryReader(DATA_DIR).load_data()
42
  storage_context = StorageContext.from_defaults()
43
  index = VectorStoreIndex.from_documents(documents)
 
49
  chat_text_qa_msgs = [
50
  (
51
  "user",
52
+ """You are a Q&A chatbot created by Prateek Mohan. Your main goal is to provide accurate answers based on the given context. If a question is outside the scope of the document, kindly advise the user to ask within the context.
53
  Context:
54
  {context_str}
55
  Question:
 
69
  else:
70
  return "Sorry, I couldn't find an answer."
71
 
72
+ # Streamlit app
73
+ st.title("Talk to your PDF")
74
+ st.markdown("by Prateek Mohan (https://github.com/prtkmhn/)")
 
 
75
 
76
  if 'messages' not in st.session_state:
77
+ st.session_state.messages = [{'role': 'system', "content": 'Chat to PDF'}]
78
 
79
  with st.sidebar:
80
  st.title("Menu:")
 
84
  filepath = "data/saved_pdf.pdf"
85
  with open(filepath, "wb") as f:
86
  f.write(uploaded_file.getbuffer())
87
+ ingest_data()
88
  st.success("Done")
89
 
90
  user_prompt = st.chat_input("Query")
 
94
  st.session_state.messages.append({'role': 'assistant', "content": response})
95
 
96
  for message in st.session_state.messages:
97
+ st.write(message['content'])