alexkueck commited on
Commit
f3bc17e
1 Parent(s): e658dac

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +19 -4
utils.py CHANGED
@@ -320,18 +320,20 @@ def rag_chain(llm, prompt, retriever):
320
  relevant_docs=[]
321
  most_relevant_docs=[]
322
  relevant_docs = retriever.get_relevant_documents(prompt)
 
 
323
 
324
  print("releant docs1......................")
325
- if (len(relevant_docs)>0):
326
  print("releant docs2......................")
327
- print(relevant_docs)
328
  #llm_chain = LLMChain(llm = llm, prompt = RAG_CHAIN_PROMPT)
329
  #result = llm_chain.run({"context": relevant_docs, "question": prompt})
330
  # Erstelle ein PromptTemplate mit Platzhaltern für Kontext und Frage
331
  #RAG_CHAIN_PROMPT = PromptTemplate(template="Context: {context}\n\nQuestion: {question}\n\nAnswer:")
332
 
333
  # Inahlte Abrufen der relevanten Dokumente
334
- doc_contents = [doc["content"] for doc in relevant_docs]
335
 
336
  #Berechne die Ähnlichkeiten und finde das relevanteste Dokument
337
  question_embedding = embedder_modell.encode(prompt, convert_to_tensor=True)
@@ -340,7 +342,7 @@ def rag_chain(llm, prompt, retriever):
340
  most_relevant_doc_indices = similarity_scores.argsort(descending=True).squeeze().tolist()
341
 
342
  #Erstelle eine Liste der relevantesten Dokumente
343
- most_relevant_docs = [relevant_docs[i] for i in most_relevant_doc_indices]
344
 
345
  #Kombiniere die Inhalte aller relevanten Dokumente
346
  combined_content = " ".join([doc["content"] for doc in most_relevant_docs])
@@ -370,6 +372,19 @@ def rag_chain(llm, prompt, retriever):
370
  return result
371
 
372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
 
375
 
 
320
  relevant_docs=[]
321
  most_relevant_docs=[]
322
  relevant_docs = retriever.get_relevant_documents(prompt)
323
+ extracted_docs = extract_document_info(relevant_docs)
324
+
325
 
326
  print("releant docs1......................")
327
+ if (len(extracted_docs)>0):
328
  print("releant docs2......................")
329
+ print(extracted_docs)
330
  #llm_chain = LLMChain(llm = llm, prompt = RAG_CHAIN_PROMPT)
331
  #result = llm_chain.run({"context": relevant_docs, "question": prompt})
332
  # Erstelle ein PromptTemplate mit Platzhaltern für Kontext und Frage
333
  #RAG_CHAIN_PROMPT = PromptTemplate(template="Context: {context}\n\nQuestion: {question}\n\nAnswer:")
334
 
335
  # Inahlte Abrufen der relevanten Dokumente
336
+ doc_contents = [doc["content"] for doc in extracted_docs]
337
 
338
  #Berechne die Ähnlichkeiten und finde das relevanteste Dokument
339
  question_embedding = embedder_modell.encode(prompt, convert_to_tensor=True)
 
342
  most_relevant_doc_indices = similarity_scores.argsort(descending=True).squeeze().tolist()
343
 
344
  #Erstelle eine Liste der relevantesten Dokumente
345
+ most_relevant_docs = [extracted_docs[i] for i in most_relevant_doc_indices]
346
 
347
  #Kombiniere die Inhalte aller relevanten Dokumente
348
  combined_content = " ".join([doc["content"] for doc in most_relevant_docs])
 
372
  return result
373
 
374
 
375
+ # Funktion zum Erstellen der Liste von Dictionaries
376
+ def extract_document_info(documents):
377
+ extracted_info = []
378
+ for doc in documents:
379
+ info = {
380
+ 'content': doc.page_content,
381
+ 'page': doc.metadata['page'],
382
+ 'path': doc.metadata['source']
383
+ }
384
+ extracted_info.append(info)
385
+ return extracted_info
386
+
387
+
388
 
389
 
390