alexkueck commited on
Commit
3685ab4
1 Parent(s): b680eb6

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +0 -75
utils.py CHANGED
@@ -267,34 +267,6 @@ def load_word_with_metadata(file_path):
267
  return documents
268
 
269
 
270
- """
271
- # Custom loader functions
272
- def load_pdf_with_metadata(file_path):
273
- document = fitz.open(file_path)
274
- documents = []
275
- for page_num in range(len(document)):
276
- page = document.load_page(page_num)
277
- content = page.get_text("text")
278
- metadata = {
279
- "title": document.metadata.get("title", "Unbekannt"),
280
- "page": page_num + 1,
281
- "path": file_path
282
- }
283
- documents.append({"page_content": content, "metadata": metadata})
284
- return documents
285
-
286
- def load_word_with_metadata(file_path):
287
- document = docx.Document(file_path)
288
- metadata = {
289
- "title": "Dokument",
290
- "path": file_path
291
- }
292
- contents = []
293
- for para in document.paragraphs:
294
- content = para.text
295
- contents.append({"page_content": content, "metadata": {**metadata, "page": 1}})
296
- return contents
297
- """
298
 
299
 
300
  ################################################
@@ -362,34 +334,7 @@ def document_retrieval_chroma(llm, prompt):
362
  return db
363
 
364
 
365
- ############################################
366
- # rag_chain Alternative für RAg mit Bild-Upload, da hier das llm so nicht genutzt werden kann und der prompt mit den RAG Erweiterungen anders übergeben wird
367
- #langchain nutzen, um prompt an llm zu leiten, aber vorher in der VektorDB suchen, um passende splits zum Prompt hinzuzufügen
368
- #prompt mit RAG!!!
369
- """
370
- def rag_chainback(prompt, db, k=3):
371
- rag_template = "Nutze ausschließlich die folgenden Kontext Teile am Ende, um die Frage zu beantworten . " + template + "Frage: " + prompt + "Kontext Teile: "
372
- retrieved_chunks = db.similarity_search(prompt, k)
373
-
374
- # Erstelle ein Dictionary für die Chunks
375
- chunks_dict = []
376
- for i, chunk in enumerate(retrieved_chunks):
377
- chunk_dict = {
378
- "chunk_index": i + 1,
379
- "page_content": chunk.page_content, # assuming chunk has page_content attribute
380
- "metadata": chunk.metadata # assuming chunk has metadata attribute
381
- }
382
- chunks_dict.append(chunk_dict)
383
-
384
- # Erstelle das neue Prompt
385
- neu_prompt = rag_template
386
- for chunk in chunks_dict:
387
- neu_prompt += f"{chunk['chunk_index']}. {chunk['page_content']}\n"
388
 
389
- print("dict.............................."+ json.dumps(chunks_dict, indent=4, ensure_ascii=False))
390
-
391
- return neu_prompt, chunks_dict # returning both the new prompt and the dictionary
392
- """
393
 
394
  ###############################################
395
  #Langchain anlegen für RAG Chaining
@@ -414,10 +359,7 @@ def rag_chain(llm, prompt, retriever):
414
  relevant_docs = retriever.get_relevant_documents(prompt)
415
  extracted_docs = extract_document_info(relevant_docs)
416
 
417
-
418
- print("releant docs1......................")
419
  if (len(extracted_docs)>0):
420
- print("releant docs2......................")
421
  print(extracted_docs)
422
  #llm_chain = LLMChain(llm = llm, prompt = RAG_CHAIN_PROMPT)
423
  #result = llm_chain.run({"context": relevant_docs, "question": prompt})
@@ -476,23 +418,6 @@ def extract_document_info(documents):
476
  }
477
  extracted_info.append(info)
478
  return extracted_info
479
-
480
-
481
- """
482
- # Funktion zum Erstellen der Liste von Dictionaries
483
- def extract_document_info(documents):
484
- extracted_info = []
485
- for doc in documents:
486
- info = {
487
- 'content' : doc["content"],
488
- 'metadaten' : doc["metadata"],
489
- 'titel' : metadaten.get("title", "Keine Überschrift"),
490
- 'seite' : metadaten.get("page", "Unbekannte Seite"),
491
- 'pfad' : metadaten.get("path", "Kein Pfad verfügbar")
492
- }
493
- extracted_info.append(info)
494
- return extracted_info
495
- """
496
 
497
 
498
 
 
267
  return documents
268
 
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
 
272
  ################################################
 
334
  return db
335
 
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
 
 
 
 
 
338
 
339
  ###############################################
340
  #Langchain anlegen für RAG Chaining
 
359
  relevant_docs = retriever.get_relevant_documents(prompt)
360
  extracted_docs = extract_document_info(relevant_docs)
361
 
 
 
362
  if (len(extracted_docs)>0):
 
363
  print(extracted_docs)
364
  #llm_chain = LLMChain(llm = llm, prompt = RAG_CHAIN_PROMPT)
365
  #result = llm_chain.run({"context": relevant_docs, "question": prompt})
 
418
  }
419
  extracted_info.append(info)
420
  return extracted_info
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
 
423