Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,7 @@ repo = Repository(
|
|
39 |
repo.git_pull() # Pull the latest changes (if any)
|
40 |
|
41 |
# Step 2: Load the PDF File
|
42 |
-
pdf_path = "Private_Book/
|
43 |
|
44 |
# Step 2: Load the PDF File
|
45 |
pdf_path2 = "Private_Book/Deutsche_Kodierrichtlinien_23.pdf" # Replace with your PDF file path
|
@@ -53,29 +53,32 @@ api_key = os.getenv("OPENAI_API_KEY")
|
|
53 |
# Updated caching mechanism using st.cache_data
|
54 |
@st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
|
55 |
def load_vector_store(file_path, store_name, force_reload=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
# Check if we need to force reload the vector store (e.g., when the PDF changes)
|
58 |
-
if force_reload or not os.path.exists(f"{store_name}.pkl"):
|
59 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
60 |
-
chunk_size=1000,
|
61 |
-
chunk_overlap=200,
|
62 |
-
length_function=len
|
63 |
-
)
|
64 |
-
|
65 |
-
text = load_pdf_text(file_path)
|
66 |
-
chunks = text_splitter.split_text(text=text)
|
67 |
-
|
68 |
-
embeddings = OpenAIEmbeddings()
|
69 |
-
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
|
70 |
-
VectorStore.save_local("faiss_store")
|
71 |
-
FAISS.load_local("faiss_store", OpenAIEmbeddings())
|
72 |
-
with open(f"{store_name}.pkl", "wb") as f:
|
73 |
-
pickle.dump(VectorStore, f)
|
74 |
-
else:
|
75 |
-
with open(f"{store_name}.pkl", "rb") as f:
|
76 |
-
VectorStore = pickle.load(f)
|
77 |
-
|
78 |
-
return VectorStore
|
79 |
|
80 |
# Utility function to load text from a PDF
|
81 |
def load_pdf_text(file_path):
|
@@ -119,6 +122,17 @@ def handle_no_answer(response):
|
|
119 |
"i cannot answer that",
|
120 |
"unable to provide an answer",
|
121 |
"not enough context",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
]
|
123 |
|
124 |
alternative_responses = [
|
@@ -184,12 +198,12 @@ def page1():
|
|
184 |
col1, col2 = st.columns(2)
|
185 |
|
186 |
with col1:
|
187 |
-
if st.button("
|
188 |
-
query = "
|
189 |
-
if st.button("
|
190 |
-
query = "
|
191 |
-
if st.button("
|
192 |
-
query = "
|
193 |
|
194 |
|
195 |
with col2:
|
|
|
39 |
repo.git_pull() # Pull the latest changes (if any)
|
40 |
|
41 |
# Step 2: Load the PDF File
|
42 |
+
pdf_path = "Private_Book/09012024_Kombi_2.pdf" # Replace with your PDF file path
|
43 |
|
44 |
# Step 2: Load the PDF File
|
45 |
pdf_path2 = "Private_Book/Deutsche_Kodierrichtlinien_23.pdf" # Replace with your PDF file path
|
|
|
53 |
# Updated caching mechanism using st.cache_data
|
54 |
@st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
|
55 |
def load_vector_store(file_path, store_name, force_reload=False):
|
56 |
+
vector_store_path = f"{store_name}.pkl"
|
57 |
+
|
58 |
+
# Check if vector store already exists and force_reload is False
|
59 |
+
if not force_reload and os.path.exists(vector_store_path):
|
60 |
+
with open(vector_store_path, "rb") as f:
|
61 |
+
VectorStore = pickle.load(f)
|
62 |
+
else:
|
63 |
+
# Load and process the PDF, then create the vector store
|
64 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
65 |
+
chunk_size=1000, chunk_overlap=200, length_function=len)
|
66 |
+
text = load_pdf_text(file_path)
|
67 |
+
chunks = text_splitter.split_text(text=text)
|
68 |
+
embeddings = OpenAIEmbeddings()
|
69 |
+
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
|
70 |
+
|
71 |
+
# Serialize the vector store
|
72 |
+
with open(vector_store_path, "wb") as f:
|
73 |
+
pickle.dump(VectorStore, f)
|
74 |
+
|
75 |
+
# Commit and push changes to the repository
|
76 |
+
repo.git_add(vector_store_path)
|
77 |
+
repo.git_commit(f"Update vector store: {store_name}")
|
78 |
+
repo.git_push()
|
79 |
+
|
80 |
+
return VectorStore
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# Utility function to load text from a PDF
|
84 |
def load_pdf_text(file_path):
|
|
|
122 |
"i cannot answer that",
|
123 |
"unable to provide an answer",
|
124 |
"not enough context",
|
125 |
+
"Sorry, I do not have enough information",
|
126 |
+
"I do not have enough information",
|
127 |
+
"I don't have enough information",
|
128 |
+
"Sorry, I don't have enough context to answer that question.",
|
129 |
+
"I don't have enough context to answer that question.",
|
130 |
+
"to answer that question.",
|
131 |
+
"Sorry",
|
132 |
+
"I'm sorry",
|
133 |
+
"I don't understand the question",
|
134 |
+
"I don't understand"
|
135 |
+
|
136 |
]
|
137 |
|
138 |
alternative_responses = [
|
|
|
198 |
col1, col2 = st.columns(2)
|
199 |
|
200 |
with col1:
|
201 |
+
if st.button("Welche Geräte müssen für die LG Geriatrie vorgehalten werden? "):
|
202 |
+
query = "Welche Geräte müssen für die LG Geriatrie vorgehalten werden? "
|
203 |
+
if st.button("Welche ärztlichen Vorgaben gibt es für die LG Palliativmedizin?"):
|
204 |
+
query = "Welche ärztlichen Vorgaben gibt es für die LG Palliativmedizin?"
|
205 |
+
if st.button("Wie haben sich die DiGA in den letzten Jahren entwickelt? Kannst du mir Daten nennen?"):
|
206 |
+
query = "Wie haben sich die DiGA in den letzten Jahren entwickelt? Kannst du mir Daten nennen?"
|
207 |
|
208 |
|
209 |
with col2:
|