valeriylo commited on
Commit
4b442ac
1 Parent(s): cd053e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -22
app.py CHANGED
@@ -10,48 +10,46 @@ from langchain.chains import ConversationalRetrievalChain
10
  from htmlTemplates import css, bot_template, user_template
11
  from langchain.llms import HuggingFaceHub, LlamaCpp
12
  from huggingface_hub import snapshot_download, hf_hub_download
13
- #from prompts import CONDENSE_QUESTION_PROMPT
 
14
 
15
  repo_name = "IlyaGusev/saiga2_13b_gguf"
16
  model_name = "model-q4_K.gguf"
17
-
18
  snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
19
 
 
20
  def get_pdf_text(pdf_docs):
21
-
22
  text = ""
23
  for pdf in pdf_docs:
24
  pdf_reader = PdfReader(pdf)
25
  for page in pdf_reader.pages:
26
  text += page.extract_text()
27
-
28
  return text
29
 
30
 
31
  def get_text_chunks(text):
32
-
33
  text_splitter = CharacterTextSplitter(separator="\n",
34
- chunk_size=500, #1000
35
- chunk_overlap=30, #200
36
  length_function=len
37
- )
38
  chunks = text_splitter.split_text(text)
39
-
40
  return chunks
41
 
42
 
43
  def get_vectorstore(text_chunks):
44
-
45
- #embeddings = OpenAIEmbeddings()
46
  embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
47
- #embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
48
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
49
-
50
  return vectorstore
51
 
52
 
53
  def get_conversation_chain(vectorstore, model_name):
54
-
55
  llm = LlamaCpp(model_path=model_name,
56
  temperature=0.1,
57
  top_k=30,
@@ -60,26 +58,27 @@ def get_conversation_chain(vectorstore, model_name):
60
  n_ctx=2048,
61
  n_parts=1,
62
  echo=True
63
- )
64
-
65
- #llm = ChatOpenAI()
66
 
 
 
67
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
68
 
69
  conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
70
- #condense_question_prompt=CONDENSE_QUESTION_PROMPT,
71
  retriever=vectorstore.as_retriever(),
72
  memory=memory,
73
  return_source_documents=True
74
- )
75
 
76
- return conversation_chain, conversation_chain['source_documents'][0]
 
 
77
 
78
 
79
  def handle_userinput(user_question):
80
-
81
  response = st.session_state.conversation({'question': user_question})
82
-
83
  st.session_state.chat_history = response['chat_history']
84
 
85
  for i, message in enumerate(st.session_state.chat_history):
@@ -90,6 +89,7 @@ def handle_userinput(user_question):
90
  st.write(bot_template.replace(
91
  "{{MSG}}", message.content), unsafe_allow_html=True)
92
 
 
93
  # main code
94
  load_dotenv()
95
 
@@ -126,3 +126,4 @@ with st.sidebar:
126
  # create conversation chain
127
  st.session_state.conversation, retrieved_docs = get_conversation_chain(vectorstore, model_name)
128
  st.text_area(retrieved_docs)
 
 
10
  from htmlTemplates import css, bot_template, user_template
11
  from langchain.llms import HuggingFaceHub, LlamaCpp
12
  from huggingface_hub import snapshot_download, hf_hub_download
13
+
14
+ # from prompts import CONDENSE_QUESTION_PROMPT
15
 
16
  repo_name = "IlyaGusev/saiga2_13b_gguf"
17
  model_name = "model-q4_K.gguf"
18
+
19
  snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
20
 
21
+
22
  def get_pdf_text(pdf_docs):
 
23
  text = ""
24
  for pdf in pdf_docs:
25
  pdf_reader = PdfReader(pdf)
26
  for page in pdf_reader.pages:
27
  text += page.extract_text()
28
+
29
  return text
30
 
31
 
32
  def get_text_chunks(text):
 
33
  text_splitter = CharacterTextSplitter(separator="\n",
34
+ chunk_size=500, # 1000
35
+ chunk_overlap=30, # 200
36
  length_function=len
37
+ )
38
  chunks = text_splitter.split_text(text)
39
+
40
  return chunks
41
 
42
 
43
  def get_vectorstore(text_chunks):
44
+ # embeddings = OpenAIEmbeddings()
 
45
  embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
46
+ # embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
47
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
48
+
49
  return vectorstore
50
 
51
 
52
  def get_conversation_chain(vectorstore, model_name):
 
53
  llm = LlamaCpp(model_path=model_name,
54
  temperature=0.1,
55
  top_k=30,
 
58
  n_ctx=2048,
59
  n_parts=1,
60
  echo=True
61
+ )
 
 
62
 
63
+ # llm = ChatOpenAI()
64
+
65
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
66
 
67
  conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
68
+ # condense_question_prompt=CONDENSE_QUESTION_PROMPT,
69
  retriever=vectorstore.as_retriever(),
70
  memory=memory,
71
  return_source_documents=True
72
+ )
73
 
74
+ result = conversation_chain
75
+
76
+ return result, result['source_documents'][0]
77
 
78
 
79
  def handle_userinput(user_question):
 
80
  response = st.session_state.conversation({'question': user_question})
81
+
82
  st.session_state.chat_history = response['chat_history']
83
 
84
  for i, message in enumerate(st.session_state.chat_history):
 
89
  st.write(bot_template.replace(
90
  "{{MSG}}", message.content), unsafe_allow_html=True)
91
 
92
+
93
  # main code
94
  load_dotenv()
95
 
 
126
  # create conversation chain
127
  st.session_state.conversation, retrieved_docs = get_conversation_chain(vectorstore, model_name)
128
  st.text_area(retrieved_docs)
129
+