mcarthuradal commited on
Commit
0eb5c98
1 Parent(s): 4214a3b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from pypdf import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter as CSplitter
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.memory import ConversationalBufferMemory()
8
+
9
+ def get_pdf_text(docs):
10
+ text = ""
11
+ for pdf in docs:
12
+ reader = PdfReader(pdf)
13
+ for page in reader.pages:
14
+ text += page.extract_text()
15
+
16
+ def get_text_chunks(text):
17
+ splitter = CSplitter(
18
+ separator="\n",
19
+ chunk_size=1000,
20
+ chunk_overlap=200,
21
+ length_function=len
22
+ )
23
+ chunks = splitter.split_text(text)
24
+ return chunks
25
+
26
+ def get_embeddings():
27
+ model_name = "sentence-transformers/all-mpnet-base-v2"
28
+ model_kwargs = {'device': 'cpu'}
29
+ encode_kwargs = {'normalize_embeddings': False}
30
+
31
+ return HuggingFaceEmbeddings(
32
+ model_name=model_name,
33
+ model_kwargs=model_kwargs,
34
+ encode_kwargs=encode_kwargs
35
+ )
36
+
37
+ def get_vectorstore(chunks):
38
+ hf = get_embeddings()
39
+ vectorstore = FAISS.from_texts(text=chunks, embedding=hf)
40
+ conversation = get_conversation_chain(vectorstore)
41
+
42
+
43
+ def main():
44
+ load_dotenv()
45
+ st.set_page_config(page_title="IDSR Chat", page_icon=":books:")
46
+ st.header("IntelSurv Chat")
47
+ st.text_input("Ask a question")
48
+
49
+ with st.sidebar:
50
+ st.subheader("TG for IDSR Booklet")
51
+ docs= st.file_uploader("Upload booklet here", accept_multiple_files=True)
52
+
53
+ if st.button("Process"):
54
+ with st.spinner("Processing"):
55
+ raw_text = get_pdf_text(docs)
56
+
57
+ chunks = get_text_chunks()
58
+ st.write(chunks)
59
+
60
+
61
+
62
+
63
+
64
+
65
+ if __name__ == '__main__':
66
+ main()