Spaces:
Sleeping
Sleeping
simplified app.py
Browse filesUse only pre-populated chroma
remove document sources
- Reports/GBV AoR Strategy 2021-2025.pdf +0 -3
- Reports/GBV_PocketGuide021718.pdf +0 -3
- Reports/UNFPA_GBV_E-Learning_Companion_Guide_ENGLISH.pdf +0 -3
- Reports/gbv_me_toolkit_eng_2nd_edition_apr_21_en_final.pdf +0 -3
- Reports/gbv_sc_sops_2018_english_final.pdf +0 -3
- Reports/gbv_toolkit_book_01_20_2015_en.pdf +0 -3
- Reports/genderbased_eng.pdf +0 -3
- Reports/handbook-for-coordinating-gbv-in-emergencies_fin.pdf +0 -3
- Reports/iasc_gender_handbook_2017.pdf +0 -3
- app.py +7 -42
Reports/GBV AoR Strategy 2021-2025.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fcee3331913a4cf854391589e2503a9c798059b856646b830bca3928d85f3b9e
|
3 |
-
size 7629117
|
|
|
|
|
|
|
|
Reports/GBV_PocketGuide021718.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fa1a2d24742803f7a2543b8f3297ab3c15ff3be96f2ebae2dfb5549c423ee2b7
|
3 |
-
size 466663
|
|
|
|
|
|
|
|
Reports/UNFPA_GBV_E-Learning_Companion_Guide_ENGLISH.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9d109398e90862628ee6eb38f564707b74c51cbf629cc7f3b7e829a5bab8705a
|
3 |
-
size 20350214
|
|
|
|
|
|
|
|
Reports/gbv_me_toolkit_eng_2nd_edition_apr_21_en_final.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:733ff134d0bd39db05f91d18080a833dcdd2baf396520ef198a43c52fd591abb
|
3 |
-
size 1978746
|
|
|
|
|
|
|
|
Reports/gbv_sc_sops_2018_english_final.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f753dde4b06dc354ba6769d18b063ca4a7c11034239b145285b2b7d02f5f97d0
|
3 |
-
size 5599140
|
|
|
|
|
|
|
|
Reports/gbv_toolkit_book_01_20_2015_en.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:38ab25a323bec1bbb4efc3c9ce9f144a8613c0dbd03bbeb32808dcbc572481ac
|
3 |
-
size 5088528
|
|
|
|
|
|
|
|
Reports/genderbased_eng.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:62674549a5f55638a93a365e22a869b3637fb37a2fed6f31f947f1829ef88186
|
3 |
-
size 491998
|
|
|
|
|
|
|
|
Reports/handbook-for-coordinating-gbv-in-emergencies_fin.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f235cf3c6f21a5f2bc2589466bfd284cb633e0a684f786a089398ba79d603db8
|
3 |
-
size 6877542
|
|
|
|
|
|
|
|
Reports/iasc_gender_handbook_2017.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:82c0127e6a48da59ec27634ac867a08a625ce5d7c1cbd261ecf0ae8a0e64b2ad
|
3 |
-
size 6582298
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,14 +1,11 @@
|
|
1 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
2 |
from langchain.vectorstores import Chroma
|
3 |
-
from langchain.
|
4 |
-
from langchain.
|
5 |
-
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain, LLMChain, LLMMathChain
|
6 |
from langchain.chat_models import ChatOpenAI
|
7 |
-
from langchain.document_loaders import DirectoryLoader
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
from langchain.utilities import WikipediaAPIWrapper
|
10 |
from langchain.agents import initialize_agent, AgentType
|
11 |
-
from langchain.document_loaders import WebBaseLoader
|
12 |
|
13 |
import gradio as gr
|
14 |
|
@@ -18,48 +15,17 @@ from app_modules.presets import *
|
|
18 |
import os
|
19 |
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_TOKEN"]
|
20 |
|
21 |
-
# Flag to load chroma store
|
22 |
-
flag_chroma = True
|
23 |
-
|
24 |
# Define the LLM chat model
|
25 |
model = 'gpt-3.5-turbo'
|
26 |
#model = 'gpt-4'
|
27 |
temperature = 0
|
28 |
llm = ChatOpenAI(temperature=temperature, model=model)
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
vectorstore = Chroma(persist_directory=persist_dir, embedding_function=embeddings)
|
36 |
-
vectorstore.persist()
|
37 |
-
else:
|
38 |
-
# Document and sources loader
|
39 |
-
pdf_loader = DirectoryLoader('./Reports/', glob="**/*.pdf")
|
40 |
-
txt_loader = DirectoryLoader('./Reports/', glob="**/*.txt")
|
41 |
-
word_loader = DirectoryLoader('./Reports/', glob="**/*.docx")
|
42 |
-
web_based_loader = WebBaseLoader(["https://www.unwomen.org/en/what-we-do/ending-violence-against-women/faqs/types-of-violence", "https://2021.gho.unocha.org/global-trends/gender-and-gender-based-violence-humanitarian-action/"])
|
43 |
-
|
44 |
-
loaders = [pdf_loader, txt_loader, word_loader, web_based_loader]
|
45 |
-
docs = []
|
46 |
-
for loader in loaders:
|
47 |
-
docs.extend(loader.load())
|
48 |
-
|
49 |
-
# Text splitter
|
50 |
-
## If chunks are bigger than 1000, it recursively splits them until fitting them within size
|
51 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
52 |
-
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
|
53 |
-
chunk_size = 1000,
|
54 |
-
chunk_overlap = 50
|
55 |
-
)
|
56 |
-
documents = text_splitter.split_documents(docs)
|
57 |
-
|
58 |
-
# Embed documents in Vectorstore
|
59 |
-
persist_dir = "chroma"
|
60 |
-
embeddings = OpenAIEmbeddings()
|
61 |
-
vectorstore = Chroma.from_documents(documents, embeddings, persist_directory=persist_dir)
|
62 |
-
vectorstore.persist()
|
63 |
|
64 |
# Create Retrieval Chain with sources
|
65 |
## It returns a dictionary with at least the 'answer' and the 'sources'
|
@@ -132,7 +98,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme, title="GBV Q&A Bo
|
|
132 |
history[-1][1] = ""
|
133 |
for character in bot_message:
|
134 |
history[-1][1] += character
|
135 |
-
#time.sleep(0.05)
|
136 |
yield history
|
137 |
|
138 |
response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
|
|
1 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
2 |
from langchain.vectorstores import Chroma
|
3 |
+
from langchain.agents import AgentExecutor, Tool
|
4 |
+
from langchain.chains import RetrievalQAWithSourcesChain
|
|
|
5 |
from langchain.chat_models import ChatOpenAI
|
|
|
6 |
from langchain.memory import ConversationBufferMemory
|
7 |
from langchain.utilities import WikipediaAPIWrapper
|
8 |
from langchain.agents import initialize_agent, AgentType
|
|
|
9 |
|
10 |
import gradio as gr
|
11 |
|
|
|
15 |
import os
|
16 |
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_TOKEN"]
|
17 |
|
|
|
|
|
|
|
18 |
# Define the LLM chat model
|
19 |
model = 'gpt-3.5-turbo'
|
20 |
#model = 'gpt-4'
|
21 |
temperature = 0
|
22 |
llm = ChatOpenAI(temperature=temperature, model=model)
|
23 |
|
24 |
+
# Load existing vectorstore
|
25 |
+
persist_dir = "./chroma"
|
26 |
+
embeddings = OpenAIEmbeddings()
|
27 |
+
vectorstore = Chroma(persist_directory=persist_dir, embedding_function=embeddings)
|
28 |
+
vectorstore.persist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
# Create Retrieval Chain with sources
|
31 |
## It returns a dictionary with at least the 'answer' and the 'sources'
|
|
|
98 |
history[-1][1] = ""
|
99 |
for character in bot_message:
|
100 |
history[-1][1] += character
|
|
|
101 |
yield history
|
102 |
|
103 |
response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|