mikepastor11 commited on
Commit
b4ffaef
1 Parent(s): 6ec9cd5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py CHANGED
@@ -7,7 +7,93 @@
7
  import streamlit as st
8
  from dotenv import load_dotenv
9
 
 
 
 
 
 
10
  from htmlTemplates import css, bot_template, user_template
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
 
 
7
  import streamlit as st
8
  from dotenv import load_dotenv
9
 
10
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
11
+ from langchain.vectorstores import FAISS
12
+ from langchain.chat_models import ChatOpenAI
13
+ from langchain.memory import ConversationBufferMemory
14
+ from langchain.chains import ConversationalRetrievalChain
15
  from htmlTemplates import css, bot_template, user_template
16
+ from langchain.llms import HuggingFaceHub
17
+
18
+
19
+ def get_pdf_text(pdf_docs):
20
+ text = ""
21
+ for pdf in pdf_docs:
22
+ pdf_reader = PdfReader(pdf)
23
+ for page in pdf_reader.pages:
24
+ text += page.extract_text()
25
+ return text
26
+
27
+ # Chunk size and overlap must not exceed the models capacity!
28
+ #
29
+ def get_text_chunks(text):
30
+ text_splitter = CharacterTextSplitter(
31
+ separator="\n",
32
+ chunk_size=800, # 1000
33
+ chunk_overlap=200,
34
+ length_function=len
35
+ )
36
+ chunks = text_splitter.split_text(text)
37
+ return chunks
38
+
39
+
40
+ def get_vectorstore(text_chunks):
41
+ # embeddings = OpenAIEmbeddings()
42
+
43
+ # pip install InstructorEmbedding
44
+ # pip install sentence-transformers==2.2.2
45
+ embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
46
+
47
+ # from InstructorEmbedding import INSTRUCTOR
48
+ # model = INSTRUCTOR('hkunlp/instructor-xl')
49
+ # sentence = "3D ActionSLAM: wearable person tracking in multi-floor environments"
50
+ # instruction = "Represent the Science title:"
51
+ # embeddings = model.encode([[instruction, sentence]])
52
+
53
+ # embeddings = model.encode(text_chunks)
54
+ print('have Embeddings: ')
55
+
56
+ # text_chunks="this is a test"
57
+ # FAISS, Chroma and other vector databases
58
+ #
59
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
60
+ print('FAISS succeeds: ')
61
+
62
+ return vectorstore
63
+
64
+ def get_conversation_chain(vectorstore):
65
+ # llm = ChatOpenAI()
66
+ # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
67
+ # google/bigbird-roberta-base facebook/bart-large
68
+ llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 512})
69
+
70
+ memory = ConversationBufferMemory(
71
+ memory_key='chat_history', return_messages=True)
72
+ conversation_chain = ConversationalRetrievalChain.from_llm(
73
+ llm=llm,
74
+ retriever=vectorstore.as_retriever(),
75
+ memory=memory,
76
+ )
77
+ return conversation_chain
78
+
79
+ def handle_userinput(user_question):
80
+
81
+ response = st.session_state.conversation({'question': user_question})
82
+ # response = st.session_state.conversation({'summarization': user_question})
83
+ st.session_state.chat_history = response['chat_history']
84
+
85
+
86
+ # st.empty()
87
+
88
+ for i, message in enumerate(st.session_state.chat_history):
89
+ if i % 2 == 0:
90
+ st.write(user_template.replace(
91
+ "{{MSG}}", message.content), unsafe_allow_html=True)
92
+
93
+ else:
94
+ st.write(bot_template.replace(
95
+ "{{MSG}}", message.content), unsafe_allow_html=True)
96
+
97
 
98
 
99