alexkueck commited on
Commit
7fc9240
·
verified ·
1 Parent(s): 0ceaa7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -1
app.py CHANGED
@@ -142,8 +142,45 @@ def search_documents(query):
142
  # Sortieren nach Relevanz
143
  sorted_indices = similarities.argsort()[::-1]
144
  results = [documents[i]['file'] for i in sorted_indices if similarities[i] > 0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- return results if results else ["No relevant documents found"]
 
 
147
 
148
 
149
  #######################################
 
142
  # Sortieren nach Relevanz
143
  sorted_indices = similarities.argsort()[::-1]
144
  results = [documents[i]['file'] for i in sorted_indices if similarities[i] > 0]
145
+ results = []
146
+ relevant_text = ""
147
+ relevant_docs = {}
148
+ num_pages_per_doc = [len(doc['pages']) for doc in documents]
149
+ cumulative_pages = [sum(num_pages_per_doc[:i+1]) for i in range(len(num_pages_per_doc))]
150
+
151
+ for i in related_docs_indices:
152
+ if cosine_similarities[i] > 0:
153
+ doc_index = next(idx for idx, cumulative in enumerate(cumulative_pages) if i < cumulative)
154
+ page_index = i if doc_index == 0 else i - cumulative_pages[doc_index-1]
155
+ doc = documents[doc_index]
156
+ page = doc['pages'][page_index]
157
+ page_content = page['content']
158
+ header_content = page.get('header', '')
159
+
160
+ # Überprüfen, ob der Suchtext in der Überschrift oder im Seiteninhalt enthalten ist
161
+ index_in_content = page_content.lower().find(query.lower())
162
+ index_in_header = header_content.lower().find(query.lower())
163
+
164
+ if index_in_content != -1 or index_in_header != -1:
165
+ # Erstellen Sie einen Snippet für die Suchergebnisse
166
+ start = max(0, index_in_content - 400) if index_in_content != -1 else 0
167
+ end = min(len(page_content), index_in_content + 400) if index_in_content != -1 else len(page_content)
168
+ snippet = f"Aus <span class='doc-name'>{doc['file']}</span> (Seite <span class='page-number'>{page_index + 1}</span>):<br>"
169
+
170
+ # Fügen Sie die Überschrift hinzu, falls vorhanden
171
+ if header_content:
172
+ snippet += f"<b>Überschrift:</b> {header_content}<br>"
173
+
174
+ snippet += f"...{page_content[start:end]}...<br><br>"
175
+ relevant_text += snippet
176
+
177
+ if doc['file'] not in relevant_docs:
178
+ relevant_docs[doc['file']] = []
179
+ relevant_docs[doc['file']].append(snippet)
180
 
181
+ results = list(relevant_docs.keys())
182
+ return results, relevant_text
183
+
184
 
185
 
186
  #######################################