Spaces:
Sleeping
Sleeping
Merge pull request #15 from almutareb/save_conversations
Browse files
app.py
CHANGED
@@ -6,10 +6,38 @@ from innovation_pathfinder_ai.source_container.container import (
|
|
6 |
from innovation_pathfinder_ai.utils.utils import extract_urls
|
7 |
from innovation_pathfinder_ai.utils import logger
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
logger = logger.get_console_logger("app")
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
if __name__ == "__main__":
|
12 |
|
|
|
|
|
|
|
|
|
13 |
def add_text(history, text):
|
14 |
history = history + [(text, None)]
|
15 |
return history, ""
|
@@ -30,6 +58,18 @@ if __name__ == "__main__":
|
|
30 |
"chat_history": history
|
31 |
}
|
32 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
return result
|
34 |
|
35 |
def vote(data: gr.LikeData):
|
|
|
6 |
from innovation_pathfinder_ai.utils.utils import extract_urls
|
7 |
from innovation_pathfinder_ai.utils import logger
|
8 |
|
9 |
+
from innovation_pathfinder_ai.utils.utils import (
|
10 |
+
generate_uuid
|
11 |
+
)
|
12 |
+
from langchain_community.vectorstores import Chroma
|
13 |
+
|
14 |
+
import chromadb
|
15 |
+
import dotenv
|
16 |
+
import os
|
17 |
+
|
18 |
+
dotenv.load_dotenv()
|
19 |
+
|
20 |
logger = logger.get_console_logger("app")
|
21 |
|
22 |
+
def initialize_chroma_db() -> Chroma:
|
23 |
+
collection_name=os.getenv("CONVERSATION_COLLECTION_NAME")
|
24 |
+
|
25 |
+
client = chromadb.PersistentClient()
|
26 |
+
|
27 |
+
collection = client.get_or_create_collection(
|
28 |
+
name=collection_name,
|
29 |
+
)
|
30 |
+
|
31 |
+
return collection
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
if __name__ == "__main__":
|
36 |
|
37 |
+
current_id = generate_uuid()
|
38 |
+
|
39 |
+
db = initialize_chroma_db()
|
40 |
+
|
41 |
def add_text(history, text):
|
42 |
history = history + [(text, None)]
|
43 |
return history, ""
|
|
|
58 |
"chat_history": history
|
59 |
}
|
60 |
)
|
61 |
+
|
62 |
+
db.add(
|
63 |
+
ids=[current_id],
|
64 |
+
documents=[result['output']],
|
65 |
+
metadatas=[
|
66 |
+
{
|
67 |
+
"query":query,
|
68 |
+
"intermediate_steps":result['intermediate_steps'].__str__()
|
69 |
+
}
|
70 |
+
]
|
71 |
+
)
|
72 |
+
|
73 |
return result
|
74 |
|
75 |
def vote(data: gr.LikeData):
|
example.env
CHANGED
@@ -8,4 +8,6 @@ OLLMA_BASE_URL=
|
|
8 |
SERPAPI_API_KEY=
|
9 |
|
10 |
# for chromadb
|
11 |
-
VECTOR_DATABASE_LOCATION=
|
|
|
|
|
|
8 |
SERPAPI_API_KEY=
|
9 |
|
10 |
# for chromadb
|
11 |
+
VECTOR_DATABASE_LOCATION=
|
12 |
+
|
13 |
+
CONVERSATION_COLLECTION_NAME="ConversationMemory"
|
innovation_pathfinder_ai/structured_tools/structured_tools.py
CHANGED
@@ -94,6 +94,7 @@ def wikipedia_search(query: str) -> str:
|
|
94 |
@tool
|
95 |
def chroma_search(query:str) -> str:
|
96 |
"""Search the Arxiv vector store for docmunets and relevent chunks"""
|
|
|
97 |
client = chromadb.PersistentClient(
|
98 |
# path=persist_directory,
|
99 |
)
|
@@ -155,4 +156,29 @@ def embed_arvix_paper(paper_id:str) -> None:
|
|
155 |
collection_name=collection_name,
|
156 |
pdf_file_location=full_path,
|
157 |
)
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
@tool
|
95 |
def chroma_search(query:str) -> str:
|
96 |
"""Search the Arxiv vector store for docmunets and relevent chunks"""
|
97 |
+
# Since we have more than one collections we should change the name of this tool
|
98 |
client = chromadb.PersistentClient(
|
99 |
# path=persist_directory,
|
100 |
)
|
|
|
156 |
collection_name=collection_name,
|
157 |
pdf_file_location=full_path,
|
158 |
)
|
159 |
+
|
160 |
+
@tool
|
161 |
+
def conversational_search(query:str) -> str:
|
162 |
+
"""Search from past conversations for docmunets and relevent chunks"""
|
163 |
+
# Since we have more than one collections we should change the name of this tool
|
164 |
+
client = chromadb.PersistentClient(
|
165 |
+
# path=persist_directory,
|
166 |
+
)
|
167 |
+
|
168 |
+
collection_name=os.getenv("CONVERSATION_COLLECTION_NAME")
|
169 |
+
#store using envar
|
170 |
+
|
171 |
+
embedding_function = SentenceTransformerEmbeddings(
|
172 |
+
model_name="all-MiniLM-L6-v2",
|
173 |
+
)
|
174 |
+
|
175 |
+
vector_db = Chroma(
|
176 |
+
client=client, # client for Chroma
|
177 |
+
collection_name=collection_name,
|
178 |
+
embedding_function=embedding_function,
|
179 |
+
)
|
180 |
+
|
181 |
+
retriever = vector_db.as_retriever()
|
182 |
+
docs = retriever.get_relevant_documents(query)
|
183 |
+
|
184 |
+
return docs.__str__()
|
innovation_pathfinder_ai/utils/utils.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import hashlib
|
2 |
import datetime
|
3 |
import os
|
|
|
4 |
|
5 |
from innovation_pathfinder_ai.utils import logger
|
6 |
|
@@ -182,4 +183,13 @@ def create_folder_if_not_exists(folder_path: str) -> None:
|
|
182 |
os.makedirs(folder_path)
|
183 |
print(f"Folder '{folder_path}' created.")
|
184 |
else:
|
185 |
-
print(f"Folder '{folder_path}' already exists.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import hashlib
|
2 |
import datetime
|
3 |
import os
|
4 |
+
import uuid
|
5 |
|
6 |
from innovation_pathfinder_ai.utils import logger
|
7 |
|
|
|
183 |
os.makedirs(folder_path)
|
184 |
print(f"Folder '{folder_path}' created.")
|
185 |
else:
|
186 |
+
print(f"Folder '{folder_path}' already exists.")
|
187 |
+
|
188 |
+
def generate_uuid() -> str:
|
189 |
+
"""
|
190 |
+
Generate a UUID (Universally Unique Identifier) and return it as a string.
|
191 |
+
|
192 |
+
Returns:
|
193 |
+
str: A UUID string.
|
194 |
+
"""
|
195 |
+
return str(uuid.uuid4())
|
innovation_pathfinder_ai/vector_store/chroma_vector_store.py
CHANGED
@@ -21,8 +21,10 @@ from langchain_community.vectorstores import Chroma
|
|
21 |
from langchain_community.embeddings.sentence_transformer import (
|
22 |
SentenceTransformerEmbeddings,
|
23 |
)
|
|
|
|
|
|
|
24 |
|
25 |
-
import uuid
|
26 |
import dotenv
|
27 |
import os
|
28 |
|
@@ -31,14 +33,7 @@ dotenv.load_dotenv()
|
|
31 |
|
32 |
VECTOR_DATABASE_LOCATION = os.getenv("VECTOR_DATABASE_LOCATION")
|
33 |
|
34 |
-
def generate_uuid() -> str:
|
35 |
-
"""
|
36 |
-
Generate a UUID (Universally Unique Identifier) and return it as a string.
|
37 |
|
38 |
-
Returns:
|
39 |
-
str: A UUID string.
|
40 |
-
"""
|
41 |
-
return str(uuid.uuid4())
|
42 |
|
43 |
def read_markdown_file(file_path: str) -> str:
|
44 |
"""
|
|
|
21 |
from langchain_community.embeddings.sentence_transformer import (
|
22 |
SentenceTransformerEmbeddings,
|
23 |
)
|
24 |
+
from innovation_pathfinder_ai.utils.utils import (
|
25 |
+
generate_uuid
|
26 |
+
)
|
27 |
|
|
|
28 |
import dotenv
|
29 |
import os
|
30 |
|
|
|
33 |
|
34 |
VECTOR_DATABASE_LOCATION = os.getenv("VECTOR_DATABASE_LOCATION")
|
35 |
|
|
|
|
|
|
|
36 |
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def read_markdown_file(file_path: str) -> str:
|
39 |
"""
|