alexkueck commited on
Commit
4421aa0
1 Parent(s): cddacd5

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +2 -20
utils.py CHANGED
@@ -424,26 +424,8 @@ def rag_chain(llm, prompt, retriever):
424
  def query(api_llm, payload):
425
  response = requests.post(api_llm, headers=HEADERS, json=payload)
426
  return response.json()
427
-
428
 
429
 
430
- def extract_document_info(documents):
431
- extracted_info = []
432
- for doc in documents:
433
- # Extract the filename from the path to use as the title
434
- filename = os.path.basename(doc.metadata.get("path", ""))
435
- title = filename if filename else "Keine Überschrift"
436
-
437
- info = {
438
- 'content': doc.page_content,
439
- 'metadata': doc.metadata,
440
- 'titel': title,
441
- 'seite': doc.metadata.get("page", "Unbekannte Seite"),
442
- 'pfad': doc.metadata.get("path", "Kein Pfad verfügbar")
443
- }
444
- extracted_info.append(info)
445
- return extracted_info
446
-
447
 
448
 
449
  def extract_document_info(documents):
@@ -456,9 +438,9 @@ def extract_document_info(documents):
456
  # Determine the document type and adjust the path accordingly
457
  doc_path = doc.metadata.get("path", "")
458
  if doc_path.endswith('.pdf'):
459
- download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/pdf/{title}?token=hf_token"
460
  elif doc_path.endswith('.docx'):
461
- download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/word/{title}?token=hf_token"
462
  else:
463
  download_link = doc_path
464
 
 
424
  def query(api_llm, payload):
425
  response = requests.post(api_llm, headers=HEADERS, json=payload)
426
  return response.json()
 
427
 
428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
 
430
 
431
  def extract_document_info(documents):
 
438
  # Determine the document type and adjust the path accordingly
439
  doc_path = doc.metadata.get("path", "")
440
  if doc_path.endswith('.pdf'):
441
+ download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/pdf/{title}"
442
  elif doc_path.endswith('.docx'):
443
+ download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/word/{title}"
444
  else:
445
  download_link = doc_path
446