valeriylo commited on
Commit
897ce6f
·
1 Parent(s): accd524

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
  from langchain.text_splitter import CharacterTextSplitter
5
- from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.chat_models import ChatOpenAI
8
  from langchain.memory import ConversationBufferMemory
@@ -11,56 +11,63 @@ from htmlTemplates import css, bot_template, user_template
11
  from langchain.llms import HuggingFaceHub, LlamaCpp
12
  from huggingface_hub import snapshot_download, hf_hub_download
13
 
14
-
15
  repo_name = "IlyaGusev/saiga2_7b_gguf"
16
  model_name = "model-q2_K.gguf"
17
 
18
  snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
19
 
20
  def get_pdf_text(pdf_docs):
 
21
  text = ""
22
  for pdf in pdf_docs:
23
  pdf_reader = PdfReader(pdf)
24
  for page in pdf_reader.pages:
25
  text += page.extract_text()
 
26
  return text
27
 
28
 
29
  def get_text_chunks(text):
30
- text_splitter = CharacterTextSplitter(
31
- separator="\n",
32
- chunk_size=1000,
33
- chunk_overlap=200,
34
- length_function=len
35
- )
36
  chunks = text_splitter.split_text(text)
 
37
  return chunks
38
 
39
 
40
  def get_vectorstore(text_chunks):
41
- embeddings = OpenAIEmbeddings()
42
- # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
 
 
43
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
 
44
  return vectorstore
45
 
46
 
47
  def get_conversation_chain(vectorstore, model_name):
48
 
49
- llm = LlamaCpp(model_path=model_name, n_ctx=2048)
50
  #llm = ChatOpenAI()
51
 
52
- memory = ConversationBufferMemory(
53
- memory_key='chat_history', return_messages=True)
54
- conversation_chain = ConversationalRetrievalChain.from_llm(
55
- llm=llm,
56
- retriever=vectorstore.as_retriever(),
57
- memory=memory
58
- )
59
  return conversation_chain
60
 
61
 
62
  def handle_userinput(user_question):
 
63
  response = st.session_state.conversation({'question': user_question})
 
64
  st.session_state.chat_history = response['chat_history']
65
 
66
  for i, message in enumerate(st.session_state.chat_history):
@@ -71,8 +78,9 @@ def handle_userinput(user_question):
71
  st.write(bot_template.replace(
72
  "{{MSG}}", message.content), unsafe_allow_html=True)
73
 
74
-
75
  load_dotenv()
 
76
  st.set_page_config(page_title="Chat with multiple PDFs",
77
  page_icon=":books:")
78
  st.write(css, unsafe_allow_html=True)
@@ -84,6 +92,7 @@ if "chat_history" not in st.session_state:
84
 
85
  st.header("Chat with multiple PDFs :books:")
86
  user_question = st.text_input("Ask a question about your documents:")
 
87
  if user_question:
88
  handle_userinput(user_question)
89
 
@@ -103,8 +112,4 @@ with st.sidebar:
103
  vectorstore = get_vectorstore(text_chunks)
104
 
105
  # create conversation chain
106
- st.session_state.conversation = get_conversation_chain(
107
- vectorstore, model_name)
108
-
109
-
110
-
 
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
  from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.chat_models import ChatOpenAI
8
  from langchain.memory import ConversationBufferMemory
 
11
  from langchain.llms import HuggingFaceHub, LlamaCpp
12
  from huggingface_hub import snapshot_download, hf_hub_download
13
 
 
14
  repo_name = "IlyaGusev/saiga2_7b_gguf"
15
  model_name = "model-q2_K.gguf"
16
 
17
  snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
18
 
19
  def get_pdf_text(pdf_docs):
20
+
21
  text = ""
22
  for pdf in pdf_docs:
23
  pdf_reader = PdfReader(pdf)
24
  for page in pdf_reader.pages:
25
  text += page.extract_text()
26
+
27
  return text
28
 
29
 
30
  def get_text_chunks(text):
31
+
32
+ text_splitter = CharacterTextSplitter(separator="\n",
33
+ chunk_size=1000,
34
+ chunk_overlap=200,
35
+ length_function=len
36
+ )
37
  chunks = text_splitter.split_text(text)
38
+
39
  return chunks
40
 
41
 
42
  def get_vectorstore(text_chunks):
43
+
44
+ #embeddings = OpenAIEmbeddings()
45
+ #embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
46
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
47
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
48
+
49
  return vectorstore
50
 
51
 
52
  def get_conversation_chain(vectorstore, model_name):
53
 
54
+ llm = LlamaCpp(model_path=model_name, n_ctx=2048, n_parts=1)
55
  #llm = ChatOpenAI()
56
 
57
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
58
+
59
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
60
+ retriever=vectorstore.as_retriever(),
61
+ memory=memory
62
+ )
63
+
64
  return conversation_chain
65
 
66
 
67
  def handle_userinput(user_question):
68
+
69
  response = st.session_state.conversation({'question': user_question})
70
+
71
  st.session_state.chat_history = response['chat_history']
72
 
73
  for i, message in enumerate(st.session_state.chat_history):
 
78
  st.write(bot_template.replace(
79
  "{{MSG}}", message.content), unsafe_allow_html=True)
80
 
81
+ # main code
82
  load_dotenv()
83
+
84
  st.set_page_config(page_title="Chat with multiple PDFs",
85
  page_icon=":books:")
86
  st.write(css, unsafe_allow_html=True)
 
92
 
93
  st.header("Chat with multiple PDFs :books:")
94
  user_question = st.text_input("Ask a question about your documents:")
95
+
96
  if user_question:
97
  handle_userinput(user_question)
98
 
 
112
  vectorstore = get_vectorstore(text_chunks)
113
 
114
  # create conversation chain
115
+ st.session_state.conversation = get_conversation_chain(vectorstore, model_name)