ibagur commited on
Commit
7d21a01
1 Parent(s): 95f349d

simplified app.py

Browse files

Use only pre-populated chroma
remove document sources

Reports/GBV AoR Strategy 2021-2025.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcee3331913a4cf854391589e2503a9c798059b856646b830bca3928d85f3b9e
3
- size 7629117
 
 
 
 
Reports/GBV_PocketGuide021718.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa1a2d24742803f7a2543b8f3297ab3c15ff3be96f2ebae2dfb5549c423ee2b7
3
- size 466663
 
 
 
 
Reports/UNFPA_GBV_E-Learning_Companion_Guide_ENGLISH.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d109398e90862628ee6eb38f564707b74c51cbf629cc7f3b7e829a5bab8705a
3
- size 20350214
 
 
 
 
Reports/gbv_me_toolkit_eng_2nd_edition_apr_21_en_final.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:733ff134d0bd39db05f91d18080a833dcdd2baf396520ef198a43c52fd591abb
3
- size 1978746
 
 
 
 
Reports/gbv_sc_sops_2018_english_final.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f753dde4b06dc354ba6769d18b063ca4a7c11034239b145285b2b7d02f5f97d0
3
- size 5599140
 
 
 
 
Reports/gbv_toolkit_book_01_20_2015_en.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:38ab25a323bec1bbb4efc3c9ce9f144a8613c0dbd03bbeb32808dcbc572481ac
3
- size 5088528
 
 
 
 
Reports/genderbased_eng.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:62674549a5f55638a93a365e22a869b3637fb37a2fed6f31f947f1829ef88186
3
- size 491998
 
 
 
 
Reports/handbook-for-coordinating-gbv-in-emergencies_fin.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f235cf3c6f21a5f2bc2589466bfd284cb633e0a684f786a089398ba79d603db8
3
- size 6877542
 
 
 
 
Reports/iasc_gender_handbook_2017.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:82c0127e6a48da59ec27634ac867a08a625ce5d7c1cbd261ecf0ae8a0e64b2ad
3
- size 6582298
 
 
 
 
app.py CHANGED
@@ -1,14 +1,11 @@
1
  from langchain.embeddings.openai import OpenAIEmbeddings
2
  from langchain.vectorstores import Chroma
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain.agents import AgentExecutor, Tool, load_tools
5
- from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain, LLMChain, LLMMathChain
6
  from langchain.chat_models import ChatOpenAI
7
- from langchain.document_loaders import DirectoryLoader
8
  from langchain.memory import ConversationBufferMemory
9
  from langchain.utilities import WikipediaAPIWrapper
10
  from langchain.agents import initialize_agent, AgentType
11
- from langchain.document_loaders import WebBaseLoader
12
 
13
  import gradio as gr
14
 
@@ -18,48 +15,17 @@ from app_modules.presets import *
18
  import os
19
  os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_TOKEN"]
20
 
21
- # Flag to load chroma store
22
- flag_chroma = True
23
-
24
  # Define the LLM chat model
25
  model = 'gpt-3.5-turbo'
26
  #model = 'gpt-4'
27
  temperature = 0
28
  llm = ChatOpenAI(temperature=temperature, model=model)
29
 
30
- # Check flag to load vectorstore
31
- if flag_chroma:
32
- # Load an existing database
33
- persist_dir = "./chroma"
34
- embeddings = OpenAIEmbeddings()
35
- vectorstore = Chroma(persist_directory=persist_dir, embedding_function=embeddings)
36
- vectorstore.persist()
37
- else:
38
- # Document and sources loader
39
- pdf_loader = DirectoryLoader('./Reports/', glob="**/*.pdf")
40
- txt_loader = DirectoryLoader('./Reports/', glob="**/*.txt")
41
- word_loader = DirectoryLoader('./Reports/', glob="**/*.docx")
42
- web_based_loader = WebBaseLoader(["https://www.unwomen.org/en/what-we-do/ending-violence-against-women/faqs/types-of-violence", "https://2021.gho.unocha.org/global-trends/gender-and-gender-based-violence-humanitarian-action/"])
43
-
44
- loaders = [pdf_loader, txt_loader, word_loader, web_based_loader]
45
- docs = []
46
- for loader in loaders:
47
- docs.extend(loader.load())
48
-
49
- # Text splitter
50
- ## If chunks are bigger than 1000, it recursively splits them until fitting them within size
51
- text_splitter = RecursiveCharacterTextSplitter(
52
- separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
53
- chunk_size = 1000,
54
- chunk_overlap = 50
55
- )
56
- documents = text_splitter.split_documents(docs)
57
-
58
- # Embed documents in Vectorstore
59
- persist_dir = "chroma"
60
- embeddings = OpenAIEmbeddings()
61
- vectorstore = Chroma.from_documents(documents, embeddings, persist_directory=persist_dir)
62
- vectorstore.persist()
63
 
64
  # Create Retrieval Chain with sources
65
  ## It returns a dictionary with at least the 'answer' and the 'sources'
@@ -132,7 +98,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme, title="GBV Q&A Bo
132
  history[-1][1] = ""
133
  for character in bot_message:
134
  history[-1][1] += character
135
- #time.sleep(0.05)
136
  yield history
137
 
138
  response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
 
1
  from langchain.embeddings.openai import OpenAIEmbeddings
2
  from langchain.vectorstores import Chroma
3
+ from langchain.agents import AgentExecutor, Tool
4
+ from langchain.chains import RetrievalQAWithSourcesChain
 
5
  from langchain.chat_models import ChatOpenAI
 
6
  from langchain.memory import ConversationBufferMemory
7
  from langchain.utilities import WikipediaAPIWrapper
8
  from langchain.agents import initialize_agent, AgentType
 
9
 
10
  import gradio as gr
11
 
 
15
  import os
16
  os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_TOKEN"]
17
 
 
 
 
18
  # Define the LLM chat model
19
  model = 'gpt-3.5-turbo'
20
  #model = 'gpt-4'
21
  temperature = 0
22
  llm = ChatOpenAI(temperature=temperature, model=model)
23
 
24
+ # Load existing vectorstore
25
+ persist_dir = "./chroma"
26
+ embeddings = OpenAIEmbeddings()
27
+ vectorstore = Chroma(persist_directory=persist_dir, embedding_function=embeddings)
28
+ vectorstore.persist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # Create Retrieval Chain with sources
31
  ## It returns a dictionary with at least the 'answer' and the 'sources'
 
98
  history[-1][1] = ""
99
  for character in bot_message:
100
  history[-1][1] += character
 
101
  yield history
102
 
103
  response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(