Update app.py
Browse files
app.py
CHANGED
@@ -142,8 +142,45 @@ def search_documents(query):
|
|
142 |
# Sortieren nach Relevanz
|
143 |
sorted_indices = similarities.argsort()[::-1]
|
144 |
results = [documents[i]['file'] for i in sorted_indices if similarities[i] > 0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
-
|
|
|
|
|
147 |
|
148 |
|
149 |
#######################################
|
|
|
142 |
# Sortieren nach Relevanz
|
143 |
sorted_indices = similarities.argsort()[::-1]
|
144 |
results = [documents[i]['file'] for i in sorted_indices if similarities[i] > 0]
|
145 |
+
results = []
|
146 |
+
relevant_text = ""
|
147 |
+
relevant_docs = {}
|
148 |
+
num_pages_per_doc = [len(doc['pages']) for doc in documents]
|
149 |
+
cumulative_pages = [sum(num_pages_per_doc[:i+1]) for i in range(len(num_pages_per_doc))]
|
150 |
+
|
151 |
+
for i in related_docs_indices:
|
152 |
+
if cosine_similarities[i] > 0:
|
153 |
+
doc_index = next(idx for idx, cumulative in enumerate(cumulative_pages) if i < cumulative)
|
154 |
+
page_index = i if doc_index == 0 else i - cumulative_pages[doc_index-1]
|
155 |
+
doc = documents[doc_index]
|
156 |
+
page = doc['pages'][page_index]
|
157 |
+
page_content = page['content']
|
158 |
+
header_content = page.get('header', '')
|
159 |
+
|
160 |
+
# Überprüfen, ob der Suchtext in der Überschrift oder im Seiteninhalt enthalten ist
|
161 |
+
index_in_content = page_content.lower().find(query.lower())
|
162 |
+
index_in_header = header_content.lower().find(query.lower())
|
163 |
+
|
164 |
+
if index_in_content != -1 or index_in_header != -1:
|
165 |
+
# Erstellen Sie einen Snippet für die Suchergebnisse
|
166 |
+
start = max(0, index_in_content - 400) if index_in_content != -1 else 0
|
167 |
+
end = min(len(page_content), index_in_content + 400) if index_in_content != -1 else len(page_content)
|
168 |
+
snippet = f"Aus <span class='doc-name'>{doc['file']}</span> (Seite <span class='page-number'>{page_index + 1}</span>):<br>"
|
169 |
+
|
170 |
+
# Fügen Sie die Überschrift hinzu, falls vorhanden
|
171 |
+
if header_content:
|
172 |
+
snippet += f"<b>Überschrift:</b> {header_content}<br>"
|
173 |
+
|
174 |
+
snippet += f"...{page_content[start:end]}...<br><br>"
|
175 |
+
relevant_text += snippet
|
176 |
+
|
177 |
+
if doc['file'] not in relevant_docs:
|
178 |
+
relevant_docs[doc['file']] = []
|
179 |
+
relevant_docs[doc['file']].append(snippet)
|
180 |
|
181 |
+
results = list(relevant_docs.keys())
|
182 |
+
return results, relevant_text
|
183 |
+
|
184 |
|
185 |
|
186 |
#######################################
|