Spaces:
No application file
No application file
Commit
·
fcac63a
1
Parent(s):
cff6e97
init files, idea
Browse files- .idea/.gitignore +3 -0
- .idea/inspectionProfiles/Project_Default.xml +21 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +4 -0
- .idea/modules.xml +8 -0
- .idea/reverse-RAG.iml +14 -0
- .idea/vcs.xml +6 -0
- app.py +3 -197
- ask_app.py +243 -0
- classify_app.py +197 -0
- faiss_utils.py +45 -0
.idea/.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Default ignored files
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<profile version="1.0">
|
3 |
+
<option name="myName" value="Project Default" />
|
4 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
5 |
+
<option name="ignoredPackages">
|
6 |
+
<value>
|
7 |
+
<list size="1">
|
8 |
+
<item index="0" class="java.lang.String" itemvalue="faiss" />
|
9 |
+
</list>
|
10 |
+
</value>
|
11 |
+
</option>
|
12 |
+
</inspection_tool>
|
13 |
+
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
14 |
+
<option name="ignoredErrors">
|
15 |
+
<list>
|
16 |
+
<option value="E265" />
|
17 |
+
</list>
|
18 |
+
</option>
|
19 |
+
</inspection_tool>
|
20 |
+
</profile>
|
21 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
.idea/misc.xml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (reverse-RAG)" project-jdk-type="Python SDK" />
|
4 |
+
</project>
|
.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/reverse-RAG.iml" filepath="$PROJECT_DIR$/.idea/reverse-RAG.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
.idea/reverse-RAG.iml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$">
|
5 |
+
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
6 |
+
</content>
|
7 |
+
<orderEntry type="inheritedJdk" />
|
8 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
9 |
+
</component>
|
10 |
+
<component name="PyDocumentationSettings">
|
11 |
+
<option name="format" value="GOOGLE" />
|
12 |
+
<option name="myDocStringFormat" value="Google" />
|
13 |
+
</component>
|
14 |
+
</module>
|
.idea/vcs.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="VcsDirectoryMappings">
|
4 |
+
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
5 |
+
</component>
|
6 |
+
</project>
|
app.py
CHANGED
@@ -1,197 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
from PyPDF2 import PdfReader
|
5 |
-
from openai import OpenAI
|
6 |
-
from langchain.chat_models import ChatOpenAI
|
7 |
-
|
8 |
-
ASK_ASH_PASSWORD = os.environ["ASK_ASH_PASSWORD"]
|
9 |
-
|
10 |
-
|
11 |
-
def gpt4_new(prompt_text):
|
12 |
-
client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
|
13 |
-
response = client.chat.completions.create(
|
14 |
-
model="gpt-4",
|
15 |
-
messages=[{"role": "system",
|
16 |
-
"content": "Du bist eine Maschine, auf Grund des Texts von PDF-Dokumenten,"
|
17 |
-
"das Dokument in vorgegebene Kategorien klassifiziert."
|
18 |
-
"Du gibts möglichst kurze Antworten, am besten ein Wort"
|
19 |
-
"Du gibst keine Erklärungen oder Begründungen. "
|
20 |
-
"Du klassifizierst nur nach den vorgegebenen Kategorien."
|
21 |
-
"Wenn ein Dokument partout nicht klassifizierbar ist, "
|
22 |
-
"antwortest du mit '<no classification>'"},
|
23 |
-
{"role": "user", "content": prompt_text}])
|
24 |
-
return response.choices[0].message.content
|
25 |
-
|
26 |
-
|
27 |
-
# Define a function to ask a question to GPT-4
|
28 |
-
def ask_gpt4(question):
|
29 |
-
print(question) # we don't have to submit the question?
|
30 |
-
try:
|
31 |
-
# Use the chat function to send a message and get a response
|
32 |
-
response = ChatOpenAI()
|
33 |
-
# Extract the response text
|
34 |
-
return response["choices"][0]["message"]["content"]
|
35 |
-
except Exception as e:
|
36 |
-
# Handle exceptions that may occur during the API call
|
37 |
-
return str(e)
|
38 |
-
|
39 |
-
|
40 |
-
def process_prompts_and_save(my_prompts):
|
41 |
-
# Ensure the responses list is empty initially
|
42 |
-
responses = []
|
43 |
-
|
44 |
-
# Loop through each prompt in the list
|
45 |
-
for prompt in my_prompts:
|
46 |
-
try:
|
47 |
-
# ADD LOGIC TO READ FILE AND CLASSIFY
|
48 |
-
# Generate response for each prompt and append to the list
|
49 |
-
response = ask_gpt4(prompt)
|
50 |
-
sol = f"{prompt}\n\n{response}\n\n\n\n"
|
51 |
-
print(sol)
|
52 |
-
responses.append(sol)
|
53 |
-
except Exception as e:
|
54 |
-
# In case of an error, log the error with the prompt
|
55 |
-
responses.append(f"{prompt}\n\nError:{str(e)}\n\n\n\n")
|
56 |
-
|
57 |
-
# Writing all responses to a text file
|
58 |
-
with open('gpt4_responses.txt', 'w', encoding='utf-8') as file:
|
59 |
-
file.writelines(responses)
|
60 |
-
|
61 |
-
|
62 |
-
def get_pdfs_text(pdf_docs):
|
63 |
-
text = ""
|
64 |
-
for pdf in pdf_docs:
|
65 |
-
pdf_reader = PdfReader(pdf)
|
66 |
-
for page in pdf_reader.pages:
|
67 |
-
text += page.extract_text()
|
68 |
-
return text
|
69 |
-
|
70 |
-
|
71 |
-
def get_pdf_text(pdf_document):
|
72 |
-
text = ""
|
73 |
-
pdf_reader = PdfReader(pdf_document)
|
74 |
-
for page in pdf_reader.pages:
|
75 |
-
text += page.extract_text()
|
76 |
-
return text
|
77 |
-
|
78 |
-
|
79 |
-
def json_open(filename):
|
80 |
-
with open(filename, "r") as f:
|
81 |
-
mydata = f.read()
|
82 |
-
return mydata
|
83 |
-
|
84 |
-
|
85 |
-
def main():
|
86 |
-
st.title("Doc Classifier")
|
87 |
-
l, r = st.columns(2)
|
88 |
-
if st.toggle("show README"):
|
89 |
-
st.subheader("Funktion: ")
|
90 |
-
st.write("der Doc Classifier von Elia Wäfler kann einige der BIM2FM Dokumente")
|
91 |
-
st.write("des ASH nach Disziplin, Doc typ. und Geschoss (später KBOB) klassifizieren.")
|
92 |
-
st.write("lade ein oder mehrere PDF-Dokumente hoch, um es auszuprobieren.")
|
93 |
-
st.write("Feedback und Bugs gerne an elia.waefler@insel.ch")
|
94 |
-
st.write("Vielen Dank.")
|
95 |
-
st.write("")
|
96 |
-
with l:
|
97 |
-
st.subheader("Limitationen: ")
|
98 |
-
st.write("bisher nur PDFs")
|
99 |
-
st.write("nur Disziplin, Doc typ. und Geschoss")
|
100 |
-
st.write("macht teilweise Fehler, vor allem bei Koordination, Datennetz usw, (unklare Disziplinen)")
|
101 |
-
st.write("")
|
102 |
-
with r:
|
103 |
-
st.subheader("geplante Erweiterungen:")
|
104 |
-
st.write("Text Beschreibung wird von AI hinzugefügt")
|
105 |
-
st.write("jpg, bilder, tabellen, .xlsx, .docx alles möglich, nicht nur PDF/Text")
|
106 |
-
st.write("Ecodomus API einbinden, um alle Dokumente zu überprüfen.")
|
107 |
-
|
108 |
-
if st.text_input("ASK_ASH_PASSWORD: ", type="password") == ASK_ASH_PASSWORD:
|
109 |
-
uploaded_files = st.file_uploader("PDF Dokument", accept_multiple_files=True)
|
110 |
-
#print(uploaded_file)
|
111 |
-
#print(uploaded_file.name)
|
112 |
-
|
113 |
-
if st.button("classify KBOB!"):
|
114 |
-
if uploaded_files is not None:
|
115 |
-
with st.container():
|
116 |
-
# col1, col2, col3, col4, col5 = st.columns(5)
|
117 |
-
col1, col2, col3 = st.columns(3)
|
118 |
-
all_metadata = []
|
119 |
-
with col1:
|
120 |
-
st.write("Disziplin")
|
121 |
-
st.write(f"")
|
122 |
-
with col2:
|
123 |
-
st.write("Dokumententyp")
|
124 |
-
st.write(f"")
|
125 |
-
with col3:
|
126 |
-
st.write("Geschoss")
|
127 |
-
st.write(f"")
|
128 |
-
|
129 |
-
for file in uploaded_files:
|
130 |
-
metadata = [file.name]
|
131 |
-
with col1:
|
132 |
-
with st.spinner("GPT4 at work"):
|
133 |
-
pdf_text = str(get_pdf_text(file))
|
134 |
-
prompt_1 = auftrag_0 + auftrag_1_disziplin + str(Baubranchen_Disziplinen) + pdf_text
|
135 |
-
answer_1 = gpt4_new(prompt_1)
|
136 |
-
print(prompt_1)
|
137 |
-
metadata.append(answer_1)
|
138 |
-
st.write(answer_1)
|
139 |
-
|
140 |
-
with col2:
|
141 |
-
with st.spinner("GPT4 at work"):
|
142 |
-
prompt_2 = auftrag_0 + auftrag_1_type + str(Dokumententypen) + pdf_text
|
143 |
-
answer_2 = gpt4_new(prompt_2)
|
144 |
-
print(prompt_2)
|
145 |
-
metadata.append(answer_2)
|
146 |
-
|
147 |
-
st.write(answer_2)
|
148 |
-
|
149 |
-
with col3:
|
150 |
-
with st.spinner("GPT4 at work"):
|
151 |
-
prompt_3 = auftrag_0 + auftrag_1_ge + str(ASH_Geschosse) + pdf_text
|
152 |
-
answer_3 = gpt4_new(prompt_3)
|
153 |
-
print(prompt_3)
|
154 |
-
metadata.append(answer_2)
|
155 |
-
|
156 |
-
st.write(answer_3)
|
157 |
-
|
158 |
-
all_metadata.append(metadata)
|
159 |
-
|
160 |
-
metadata_filename = "ai_generated_metadata.txt"
|
161 |
-
with open(metadata_filename, 'w', encoding='utf-8') as f:
|
162 |
-
for line in all_metadata:
|
163 |
-
f.writelines("\n")
|
164 |
-
for item in line:
|
165 |
-
f.writelines(item)
|
166 |
-
f.writelines(";")
|
167 |
-
|
168 |
-
f.writelines("\n")
|
169 |
-
|
170 |
-
st.success("classified, saved")
|
171 |
-
st.download_button(f"Download Metadata", json_open(metadata_filename), file_name=metadata_filename)
|
172 |
-
else:
|
173 |
-
st.warning("no file")
|
174 |
-
|
175 |
-
|
176 |
-
if __name__ == "__main__":
|
177 |
-
#prompts = ["classify the document, tell me the ", "hello"]
|
178 |
-
#process_prompts_and_save(prompts)
|
179 |
-
auftrag_0 = "Klassifiziere dieses Dokument nach "
|
180 |
-
auftrag_1_disziplin = "diesen 'Baubranchen Disziplinen': "
|
181 |
-
auftrag_1_type = "diesen 'Dokumententypen': "
|
182 |
-
auftrag_1_ge = "diesen 'Geschossen': "
|
183 |
-
Baubranchen_Disziplinen = ['A-Architektur', 'B-Bauphysik', 'C-Rohrpostanlagen', 'D-Datennetz', 'E-Elektroanlagen',
|
184 |
-
'F-Fassadenplanung', 'G-Küche', 'H-Heizung', 'I-Innenausbau', 'K-Kälte', 'L-Lüftung',
|
185 |
-
'M-Medizintechnik', 'N-Fördertechnik', 'O-Gebäudebetrieb', 'P-Sprinkler',
|
186 |
-
'Q-Brandschutz', 'R-Koordination', 'S-Sanitär', 'T-Tragwerksplanung', 'W-Informatik',
|
187 |
-
'Z-Lichtplanung']
|
188 |
-
auftrag_2 = "gib nur den am besten passendsten Eintrag zurück. " \
|
189 |
-
"Keine weiteren Ausführungen oder Erklärungen. " \
|
190 |
-
"Antworte am besten in einem Wort. " \
|
191 |
-
"Hier der Dokumenteninhalt: "
|
192 |
-
Dokumententypen = ['Fotodokumentation', 'Projektdokumentation (PD)', 'Objektdokumentation (OD)',
|
193 |
-
'Prozessdokumentation', 'Fachdokumentation', 'Anlagedokumentation']
|
194 |
-
ASH_Geschosse = ['U4', 'U3', 'U2', 'U1',
|
195 |
-
'A', 'B', 'C', 'D', 'E', 'F', 'G']
|
196 |
-
#print(str(Baubranchen_Disziplinen))
|
197 |
-
main()
|
|
|
1 |
+
"""the idea is to embed all KBOB categories as vectores.
|
2 |
+
then when a new document in added, we do a sim search with the doc vector in the KBOB vectores
|
3 |
+
to map/classify. can be done in multiple steps. """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ask_app.py
ADDED
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
complete, functional RAG App
|
3 |
+
stores vectors in session state, or locally.
|
4 |
+
add function to display retrieved documents
|
5 |
+
"""
|
6 |
+
|
7 |
+
# import time
|
8 |
+
from datetime import datetime
|
9 |
+
# import openai
|
10 |
+
# import tiktoken
|
11 |
+
import streamlit as st
|
12 |
+
from PyPDF2 import PdfReader
|
13 |
+
from langchain.text_splitter import CharacterTextSplitter
|
14 |
+
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
|
15 |
+
from langchain.vectorstores import FAISS
|
16 |
+
from langchain.chat_models import ChatOpenAI
|
17 |
+
from langchain.memory import ConversationBufferMemory
|
18 |
+
from langchain.chains import ConversationalRetrievalChain
|
19 |
+
from html_templates import css, bot_template, user_template
|
20 |
+
from langchain.llms import HuggingFaceHub
|
21 |
+
import os
|
22 |
+
import numpy as np
|
23 |
+
import faiss_utils
|
24 |
+
from langchain_community.vectorstores import FAISS
|
25 |
+
from langchain.embeddings import OpenAIEmbeddings
|
26 |
+
|
27 |
+
|
28 |
+
def merge_faiss_indices(index1, index2):
|
29 |
+
"""
|
30 |
+
Merge two FAISS indices into a new index, assuming both are of the same type and dimensionality.
|
31 |
+
|
32 |
+
Args:
|
33 |
+
index1 (faiss.Index): The first FAISS index.
|
34 |
+
index2 (faiss.Index): The second FAISS index.
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
faiss.Index: A new FAISS index containing all vectors from index1 and index2.
|
38 |
+
"""
|
39 |
+
|
40 |
+
# Check if both indices are the same type
|
41 |
+
if type(index1) != type(index2):
|
42 |
+
raise ValueError("Indices are of different types")
|
43 |
+
|
44 |
+
# Check dimensionality
|
45 |
+
if index1.d != index2.d:
|
46 |
+
raise ValueError("Indices have different dimensionality")
|
47 |
+
|
48 |
+
# Determine type of indices
|
49 |
+
if isinstance(index1, FAISS.IndexFlatL2):
|
50 |
+
# Handle simple flat indices
|
51 |
+
d = index1.d
|
52 |
+
# Extract vectors from both indices
|
53 |
+
xb1 = FAISS.rev_swig_ptr(index1.xb.data(), index1.ntotal * d)
|
54 |
+
xb2 = FAISS.rev_swig_ptr(index2.xb.data(), index2.ntotal * d)
|
55 |
+
|
56 |
+
# Combine vectors
|
57 |
+
xb_combined = np.vstack((xb1, xb2))
|
58 |
+
|
59 |
+
# Create a new index and add combined vectors
|
60 |
+
new_index = FAISS.IndexFlatL2(d)
|
61 |
+
new_index.add(xb_combined)
|
62 |
+
return new_index
|
63 |
+
|
64 |
+
elif isinstance(index1, FAISS.IndexIVFFlat):
|
65 |
+
# Handle quantized indices (IndexIVFFlat)
|
66 |
+
d = index1.d
|
67 |
+
nlist = index1.nlist
|
68 |
+
quantizer = FAISS.IndexFlatL2(d) # Re-create the appropriate quantizer
|
69 |
+
|
70 |
+
# Create a new index with the same configuration
|
71 |
+
new_index = FAISS.IndexIVFFlat(quantizer, d, nlist, FAISS.METRIC_L2)
|
72 |
+
|
73 |
+
# If the indices are already trained, you can directly add the vectors
|
74 |
+
# Otherwise, you may need to train new_index using a representative subset of vectors
|
75 |
+
vecs1 = FAISS.rev_swig_ptr(index1.xb.data(), index1.ntotal * d)
|
76 |
+
vecs2 = FAISS.rev_swig_ptr(index2.xb.data(), index2.ntotal * d)
|
77 |
+
new_index.add(vecs1)
|
78 |
+
new_index.add(vecs2)
|
79 |
+
return new_index
|
80 |
+
|
81 |
+
else:
|
82 |
+
raise TypeError("Index type not supported for merging in this function")
|
83 |
+
|
84 |
+
|
85 |
+
def get_pdf_text(pdf_docs):
|
86 |
+
text = ""
|
87 |
+
for pdf in pdf_docs:
|
88 |
+
pdf_reader = PdfReader(pdf)
|
89 |
+
for page in pdf_reader.pages:
|
90 |
+
text += page.extract_text()
|
91 |
+
return text
|
92 |
+
|
93 |
+
|
94 |
+
def get_text_chunks(text):
|
95 |
+
text_splitter = CharacterTextSplitter(
|
96 |
+
separator="\n",
|
97 |
+
chunk_size=1000,
|
98 |
+
chunk_overlap=200,
|
99 |
+
length_function=len
|
100 |
+
)
|
101 |
+
chunks = text_splitter.split_text(text)
|
102 |
+
return chunks
|
103 |
+
|
104 |
+
|
105 |
+
def get_faiss_vectorstore(text_chunks):
|
106 |
+
if sst.openai:
|
107 |
+
my_embeddings = OpenAIEmbeddings()
|
108 |
+
else:
|
109 |
+
my_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
|
110 |
+
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=my_embeddings)
|
111 |
+
return vectorstore
|
112 |
+
|
113 |
+
|
114 |
+
def get_conversation_chain(vectorstore):
|
115 |
+
if sst.openai:
|
116 |
+
llm = ChatOpenAI()
|
117 |
+
else:
|
118 |
+
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 512})
|
119 |
+
|
120 |
+
memory = ConversationBufferMemory(
|
121 |
+
memory_key='chat_history', return_messages=True)
|
122 |
+
conversation_chain = ConversationalRetrievalChain.from_llm(
|
123 |
+
llm=llm,
|
124 |
+
retriever=vectorstore.as_retriever(),
|
125 |
+
memory=memory
|
126 |
+
)
|
127 |
+
return conversation_chain
|
128 |
+
|
129 |
+
|
130 |
+
def handle_userinput(user_question):
|
131 |
+
response = sst.conversation({'question': user_question})
|
132 |
+
sst.chat_history = response['chat_history']
|
133 |
+
|
134 |
+
for i, message in enumerate(sst.chat_history):
|
135 |
+
# Display user message
|
136 |
+
if i % 2 == 0:
|
137 |
+
st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
|
138 |
+
else:
|
139 |
+
print(message)
|
140 |
+
# Display AI response
|
141 |
+
st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
|
142 |
+
# Display source document information if available in the message
|
143 |
+
if hasattr(message, 'source') and message.source:
|
144 |
+
st.write(f"Source Document: {message.source}", unsafe_allow_html=True)
|
145 |
+
|
146 |
+
|
147 |
+
if True:
|
148 |
+
BASE_URL = "https://api.vectara.io/v1"
|
149 |
+
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
150 |
+
OPENAI_ORG_ID = os.environ["OPENAI_ORG_ID"]
|
151 |
+
PINECONE_API_KEY = os.environ["PINECONE_API_KEY_LCBIM"]
|
152 |
+
HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]
|
153 |
+
VECTARA_API_KEY = os.environ["VECTARA_API_KEY"]
|
154 |
+
VECTARA_CUSTOMER_ID = os.environ["VECTARA_CUSTOMER_ID"]
|
155 |
+
headers = {"Authorization": f"Bearer {VECTARA_API_KEY}", "Content-Type": "application/json"}
|
156 |
+
|
157 |
+
|
158 |
+
def main():
|
159 |
+
st.set_page_config(page_title="Anna Seiler Haus KI-Assistent", page_icon=":hospital:")
|
160 |
+
st.write(css, unsafe_allow_html=True)
|
161 |
+
if "conversation" not in sst:
|
162 |
+
sst.conversation = None
|
163 |
+
if "chat_history" not in sst:
|
164 |
+
sst.chat_history = None
|
165 |
+
if "page" not in sst:
|
166 |
+
sst.page = "home"
|
167 |
+
if "openai" not in sst:
|
168 |
+
sst.openai = True
|
169 |
+
if "login" not in sst:
|
170 |
+
sst.login = False
|
171 |
+
if 'submitted_user_query' not in sst:
|
172 |
+
sst.submitted_user_query = ''
|
173 |
+
if 'submitted_user_safe' not in sst:
|
174 |
+
sst.submitted_user_safe = ''
|
175 |
+
if 'submitted_user_load' not in sst:
|
176 |
+
sst.submitted_user_load = ''
|
177 |
+
|
178 |
+
def submit_user_query():
|
179 |
+
sst.submitted_user_query = sst.widget_user_query
|
180 |
+
sst.widget_user_query = ''
|
181 |
+
|
182 |
+
def submit_user_safe():
|
183 |
+
sst.submitted_user_safe = sst.widget_user_safe
|
184 |
+
sst.widget_user_safe = ''
|
185 |
+
if "vectorstore" in sst:
|
186 |
+
# faiss_name = str(datetime.now().strftime("%Y%m%d%H%M%S")) + "faiss_index"
|
187 |
+
faiss_utils.save_local(sst.vectorstore, path=sst.submitted_user_safe)
|
188 |
+
st.sidebar.success("saved")
|
189 |
+
else:
|
190 |
+
st.sidebar.warning("No embeddings to save. Please process documents first.")
|
191 |
+
|
192 |
+
def submit_user_load():
|
193 |
+
sst.submitted_user_load = sst.widget_user_load
|
194 |
+
sst.widget_user_load = ''
|
195 |
+
if os.path.exists(sst.submitted_user_load):
|
196 |
+
new_db = faiss_utils.load_vectorstore(f"{sst.submitted_user_load}/faiss_index.index")
|
197 |
+
if "vectorstore" in sst:
|
198 |
+
if new_db is not None: # Check if this is working
|
199 |
+
sst.vectorstore.merge_from(new_db)
|
200 |
+
sst.conversation = get_conversation_chain(sst.vectorstore)
|
201 |
+
st.sidebar.success("faiss loaded")
|
202 |
+
else:
|
203 |
+
if new_db is not None: # Check if this is working
|
204 |
+
sst.vectorstore = new_db
|
205 |
+
sst.conversation = get_conversation_chain(new_db)
|
206 |
+
st.sidebar.success("faiss loaded")
|
207 |
+
else:
|
208 |
+
st.sidebar.warning("Couldn't load/find embeddings")
|
209 |
+
|
210 |
+
st.header("Anna Seiler Haus KI-Assistent ASH :hospital:")
|
211 |
+
if st.text_input("ASK_ASH_PASSWORD: ", type="password") == ASK_ASH_PASSWORD:
|
212 |
+
|
213 |
+
#user_question = st.text_input("Ask a question about your documents:", key="user_query", on_change=handle_query)
|
214 |
+
st.text_input('Ask a question about your documents:', key='widget_user_query', on_change=submit_user_query)
|
215 |
+
#sst.openai = st.toggle(label="use openai?")
|
216 |
+
|
217 |
+
if sst.submitted_user_query:
|
218 |
+
if "vectorstore" in sst:
|
219 |
+
handle_userinput(sst.submitted_user_query)
|
220 |
+
else:
|
221 |
+
st.warning("no vectorstore loaded.")
|
222 |
+
|
223 |
+
with st.sidebar:
|
224 |
+
st.subheader("Your documents")
|
225 |
+
pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
|
226 |
+
if st.button("Process"):
|
227 |
+
with st.spinner("Processing"):
|
228 |
+
vec = get_faiss_vectorstore(get_text_chunks(get_pdf_text(pdf_docs)))
|
229 |
+
sst.vectorstore = vec
|
230 |
+
sst.conversation = get_conversation_chain(vec)
|
231 |
+
st.success("embedding complete")
|
232 |
+
|
233 |
+
st.text_input('Safe Embeddings to: (copy path of folder)', key='widget_user_safe',
|
234 |
+
on_change=submit_user_safe)
|
235 |
+
|
236 |
+
st.text_input('Load Embeddings from: (copy path of folder)', key='widget_user_load',
|
237 |
+
on_change=submit_user_load)
|
238 |
+
|
239 |
+
|
240 |
+
if __name__ == '__main__':
|
241 |
+
sst = st.session_state
|
242 |
+
ASK_ASH_PASSWORD = os.environ["ASK_ASH_PASSWORD"]
|
243 |
+
main()
|
classify_app.py
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
# import openai
|
4 |
+
from PyPDF2 import PdfReader
|
5 |
+
from openai import OpenAI
|
6 |
+
from langchain.chat_models import ChatOpenAI
|
7 |
+
|
8 |
+
ASK_ASH_PASSWORD = os.environ["ASK_ASH_PASSWORD"]
|
9 |
+
|
10 |
+
|
11 |
+
def gpt4_new(prompt_text):
|
12 |
+
client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
|
13 |
+
response = client.chat.completions.create(
|
14 |
+
model="gpt-4",
|
15 |
+
messages=[{"role": "system",
|
16 |
+
"content": "Du bist eine Maschine, auf Grund des Texts von PDF-Dokumenten,"
|
17 |
+
"das Dokument in vorgegebene Kategorien klassifiziert."
|
18 |
+
"Du gibts möglichst kurze Antworten, am besten ein Wort"
|
19 |
+
"Du gibst keine Erklärungen oder Begründungen. "
|
20 |
+
"Du klassifizierst nur nach den vorgegebenen Kategorien."
|
21 |
+
"Wenn ein Dokument partout nicht klassifizierbar ist, "
|
22 |
+
"antwortest du mit '<no classification>'"},
|
23 |
+
{"role": "user", "content": prompt_text}])
|
24 |
+
return response.choices[0].message.content
|
25 |
+
|
26 |
+
|
27 |
+
# Define a function to ask a question to GPT-4
|
28 |
+
def ask_gpt4(question):
|
29 |
+
print(question) # we don't have to submit the question?
|
30 |
+
try:
|
31 |
+
# Use the chat function to send a message and get a response
|
32 |
+
response = ChatOpenAI()
|
33 |
+
# Extract the response text
|
34 |
+
return response["choices"][0]["message"]["content"]
|
35 |
+
except Exception as e:
|
36 |
+
# Handle exceptions that may occur during the API call
|
37 |
+
return str(e)
|
38 |
+
|
39 |
+
|
40 |
+
def process_prompts_and_save(my_prompts):
|
41 |
+
# Ensure the responses list is empty initially
|
42 |
+
responses = []
|
43 |
+
|
44 |
+
# Loop through each prompt in the list
|
45 |
+
for prompt in my_prompts:
|
46 |
+
try:
|
47 |
+
# ADD LOGIC TO READ FILE AND CLASSIFY
|
48 |
+
# Generate response for each prompt and append to the list
|
49 |
+
response = ask_gpt4(prompt)
|
50 |
+
sol = f"{prompt}\n\n{response}\n\n\n\n"
|
51 |
+
print(sol)
|
52 |
+
responses.append(sol)
|
53 |
+
except Exception as e:
|
54 |
+
# In case of an error, log the error with the prompt
|
55 |
+
responses.append(f"{prompt}\n\nError:{str(e)}\n\n\n\n")
|
56 |
+
|
57 |
+
# Writing all responses to a text file
|
58 |
+
with open('gpt4_responses.txt', 'w', encoding='utf-8') as file:
|
59 |
+
file.writelines(responses)
|
60 |
+
|
61 |
+
|
62 |
+
def get_pdfs_text(pdf_docs):
|
63 |
+
text = ""
|
64 |
+
for pdf in pdf_docs:
|
65 |
+
pdf_reader = PdfReader(pdf)
|
66 |
+
for page in pdf_reader.pages:
|
67 |
+
text += page.extract_text()
|
68 |
+
return text
|
69 |
+
|
70 |
+
|
71 |
+
def get_pdf_text(pdf_document):
|
72 |
+
text = ""
|
73 |
+
pdf_reader = PdfReader(pdf_document)
|
74 |
+
for page in pdf_reader.pages:
|
75 |
+
text += page.extract_text()
|
76 |
+
return text
|
77 |
+
|
78 |
+
|
79 |
+
def json_open(filename):
|
80 |
+
with open(filename, "r") as f:
|
81 |
+
mydata = f.read()
|
82 |
+
return mydata
|
83 |
+
|
84 |
+
|
85 |
+
def main():
|
86 |
+
st.title("Doc Classifier")
|
87 |
+
l, r = st.columns(2)
|
88 |
+
if st.toggle("show README"):
|
89 |
+
st.subheader("Funktion: ")
|
90 |
+
st.write("der Doc Classifier von Elia Wäfler kann einige der BIM2FM Dokumente")
|
91 |
+
st.write("des ASH nach Disziplin, Doc typ. und Geschoss (später KBOB) klassifizieren.")
|
92 |
+
st.write("lade ein oder mehrere PDF-Dokumente hoch, um es auszuprobieren.")
|
93 |
+
st.write("Feedback und Bugs gerne an elia.waefler@insel.ch")
|
94 |
+
st.write("Vielen Dank.")
|
95 |
+
st.write("")
|
96 |
+
with l:
|
97 |
+
st.subheader("Limitationen: ")
|
98 |
+
st.write("bisher nur PDFs")
|
99 |
+
st.write("nur Disziplin, Doc typ. und Geschoss")
|
100 |
+
st.write("macht teilweise Fehler, vor allem bei Koordination, Datennetz usw, (unklare Disziplinen)")
|
101 |
+
st.write("")
|
102 |
+
with r:
|
103 |
+
st.subheader("geplante Erweiterungen:")
|
104 |
+
st.write("Text Beschreibung wird von AI hinzugefügt")
|
105 |
+
st.write("jpg, bilder, tabellen, .xlsx, .docx alles möglich, nicht nur PDF/Text")
|
106 |
+
st.write("Ecodomus API einbinden, um alle Dokumente zu überprüfen.")
|
107 |
+
|
108 |
+
if st.text_input("ASK_ASH_PASSWORD: ", type="password") == ASK_ASH_PASSWORD:
|
109 |
+
uploaded_files = st.file_uploader("PDF Dokument", accept_multiple_files=True)
|
110 |
+
#print(uploaded_file)
|
111 |
+
#print(uploaded_file.name)
|
112 |
+
|
113 |
+
if st.button("classify KBOB!"):
|
114 |
+
if uploaded_files is not None:
|
115 |
+
with st.container():
|
116 |
+
# col1, col2, col3, col4, col5 = st.columns(5)
|
117 |
+
col1, col2, col3 = st.columns(3)
|
118 |
+
all_metadata = []
|
119 |
+
with col1:
|
120 |
+
st.write("Disziplin")
|
121 |
+
st.write(f"")
|
122 |
+
with col2:
|
123 |
+
st.write("Dokumententyp")
|
124 |
+
st.write(f"")
|
125 |
+
with col3:
|
126 |
+
st.write("Geschoss")
|
127 |
+
st.write(f"")
|
128 |
+
|
129 |
+
for file in uploaded_files:
|
130 |
+
metadata = [file.name]
|
131 |
+
with col1:
|
132 |
+
with st.spinner("GPT4 at work"):
|
133 |
+
pdf_text = str(get_pdf_text(file))
|
134 |
+
prompt_1 = auftrag_0 + auftrag_1_disziplin + str(Baubranchen_Disziplinen) + pdf_text
|
135 |
+
answer_1 = gpt4_new(prompt_1)
|
136 |
+
print(prompt_1)
|
137 |
+
metadata.append(answer_1)
|
138 |
+
st.write(answer_1)
|
139 |
+
|
140 |
+
with col2:
|
141 |
+
with st.spinner("GPT4 at work"):
|
142 |
+
prompt_2 = auftrag_0 + auftrag_1_type + str(Dokumententypen) + pdf_text
|
143 |
+
answer_2 = gpt4_new(prompt_2)
|
144 |
+
print(prompt_2)
|
145 |
+
metadata.append(answer_2)
|
146 |
+
|
147 |
+
st.write(answer_2)
|
148 |
+
|
149 |
+
with col3:
|
150 |
+
with st.spinner("GPT4 at work"):
|
151 |
+
prompt_3 = auftrag_0 + auftrag_1_ge + str(ASH_Geschosse) + pdf_text
|
152 |
+
answer_3 = gpt4_new(prompt_3)
|
153 |
+
print(prompt_3)
|
154 |
+
metadata.append(answer_2)
|
155 |
+
|
156 |
+
st.write(answer_3)
|
157 |
+
|
158 |
+
all_metadata.append(metadata)
|
159 |
+
|
160 |
+
metadata_filename = "ai_generated_metadata.txt"
|
161 |
+
with open(metadata_filename, 'w', encoding='utf-8') as f:
|
162 |
+
for line in all_metadata:
|
163 |
+
f.writelines("\n")
|
164 |
+
for item in line:
|
165 |
+
f.writelines(item)
|
166 |
+
f.writelines(";")
|
167 |
+
|
168 |
+
f.writelines("\n")
|
169 |
+
|
170 |
+
st.success("classified, saved")
|
171 |
+
st.download_button(f"Download Metadata", json_open(metadata_filename), file_name=metadata_filename)
|
172 |
+
else:
|
173 |
+
st.warning("no file")
|
174 |
+
|
175 |
+
|
176 |
+
if __name__ == "__main__":
|
177 |
+
#prompts = ["classify the document, tell me the ", "hello"]
|
178 |
+
#process_prompts_and_save(prompts)
|
179 |
+
auftrag_0 = "Klassifiziere dieses Dokument nach "
|
180 |
+
auftrag_1_disziplin = "diesen 'Baubranchen Disziplinen': "
|
181 |
+
auftrag_1_type = "diesen 'Dokumententypen': "
|
182 |
+
auftrag_1_ge = "diesen 'Geschossen': "
|
183 |
+
Baubranchen_Disziplinen = ['A-Architektur', 'B-Bauphysik', 'C-Rohrpostanlagen', 'D-Datennetz', 'E-Elektroanlagen',
|
184 |
+
'F-Fassadenplanung', 'G-Küche', 'H-Heizung', 'I-Innenausbau', 'K-Kälte', 'L-Lüftung',
|
185 |
+
'M-Medizintechnik', 'N-Fördertechnik', 'O-Gebäudebetrieb', 'P-Sprinkler',
|
186 |
+
'Q-Brandschutz', 'R-Koordination', 'S-Sanitär', 'T-Tragwerksplanung', 'W-Informatik',
|
187 |
+
'Z-Lichtplanung']
|
188 |
+
auftrag_2 = "gib nur den am besten passendsten Eintrag zurück. " \
|
189 |
+
"Keine weiteren Ausführungen oder Erklärungen. " \
|
190 |
+
"Antworte am besten in einem Wort. " \
|
191 |
+
"Hier der Dokumenteninhalt: "
|
192 |
+
Dokumententypen = ['Fotodokumentation', 'Projektdokumentation (PD)', 'Objektdokumentation (OD)',
|
193 |
+
'Prozessdokumentation', 'Fachdokumentation', 'Anlagedokumentation']
|
194 |
+
ASH_Geschosse = ['U4', 'U3', 'U2', 'U1',
|
195 |
+
'A', 'B', 'C', 'D', 'E', 'F', 'G']
|
196 |
+
#print(str(Baubranchen_Disziplinen))
|
197 |
+
main()
|
faiss_utils.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langchain_community.vectorstores import FAISS
|
3 |
+
from langchain.embeddings import OpenAIEmbeddings
|
4 |
+
|
5 |
+
|
6 |
+
def embed(input_strings):
|
7 |
+
vectorstore = FAISS.from_texts(texts=input_strings, embedding=OpenAIEmbeddings())
|
8 |
+
return vectorstore
|
9 |
+
|
10 |
+
|
11 |
+
# Function to save a FAISS vectorstore to a specified path
|
12 |
+
def save_local(vectorstore, path="safe/"):
|
13 |
+
if not os.path.exists(path):
|
14 |
+
os.makedirs(path)
|
15 |
+
file_path = os.path.join(path, "faiss_index.index")
|
16 |
+
vectorstore.save_local(file_path)
|
17 |
+
print(f"FAISS vectorstore saved to {file_path}")
|
18 |
+
|
19 |
+
|
20 |
+
# Function to load a FAISS vectorstore from a specified path
|
21 |
+
def load_vectorstore(path):
|
22 |
+
embeddings = OpenAIEmbeddings() # Needed to initialize the FAISS properly
|
23 |
+
vectorstore = FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
|
24 |
+
print(f"FAISS vectorstore loaded from {path}")
|
25 |
+
return vectorstore
|
26 |
+
|
27 |
+
|
28 |
+
# Example usage
|
29 |
+
if __name__ == "__main__":
|
30 |
+
# Embed a few words
|
31 |
+
words = ["hello", "world", "sample", "text"]
|
32 |
+
faiss_db1 = embed(words)
|
33 |
+
|
34 |
+
# Save the vectorstore
|
35 |
+
save_local(faiss_db1)
|
36 |
+
|
37 |
+
# Load the vectorstore
|
38 |
+
loaded_db1 = load_vectorstore("safe/faiss_index.index")
|
39 |
+
|
40 |
+
# Embed another set of words and create a second vectorstore
|
41 |
+
more_words = ["FAISS", "database", "information", "retrieval"]
|
42 |
+
faiss_db2 = embed(more_words)
|
43 |
+
|
44 |
+
loaded_db1.merge_from(faiss_db2)
|
45 |
+
print("Merged vectorstore with other vectorstore containing total vectors:", loaded_db1.index.ntotal)
|