alexkueck commited on
Commit
51f975f
1 Parent(s): d7e55c8

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +27 -13
utils.py CHANGED
@@ -793,14 +793,16 @@ def extract_document_info(documents):
793
  title = filename if filename else "Keine Überschrift"
794
 
795
  # Determine the document type and adjust the path accordingly
 
 
796
  doc_path = doc.metadata.get("path", "")
797
  if doc_path.endswith('.pdf'):
798
- download_link = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/pdf/{title}"
799
  elif doc_path.endswith('.docx'):
800
  download_link = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/word/{title}"
801
  else:
802
  download_link = doc_path
803
-
804
 
805
  info = {
806
  'content': doc.page_content,
@@ -879,23 +881,35 @@ def download_link(doc):
879
  file_url = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/{doc}?token=hf_token"
880
  return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc}</a></b>'
881
 
 
882
 
883
  def download_link(doc):
884
  # Basis-URL für das Hugging Face Repository
885
  base_url = f"https://huggingface.co/spaces/{STORAGE_REPO_ID}/resolve/main"
886
 
887
- # Check if doc is a dictionary and contains the key 'pfad'
888
- if isinstance(doc, dict) and 'pfad' in doc:
889
- # URL-encode the path to handle special characters
890
- encoded_path = quote(doc['pfad'])
891
- file_url = f"{base_url}/{encoded_path}?token={hf_token}"
892
- return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc["titel"]}</a></b>'
 
893
  else:
894
- # URL-encode the document name to handle special characters
895
- encoded_doc = quote(doc)
896
- file_url = f"{base_url}/{encoded_doc}?token={hf_token}"
897
- return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc}</a></b>'
898
- """
 
 
 
 
 
 
 
 
 
 
899
 
900
 
901
 
 
793
  title = filename if filename else "Keine Überschrift"
794
 
795
  # Determine the document type and adjust the path accordingly
796
+ download_link = download_link(doc)
797
+ """
798
  doc_path = doc.metadata.get("path", "")
799
  if doc_path.endswith('.pdf'):
800
+ download_link = download_link(doc) #f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/pdf/{title}"
801
  elif doc_path.endswith('.docx'):
802
  download_link = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/word/{title}"
803
  else:
804
  download_link = doc_path
805
+ """
806
 
807
  info = {
808
  'content': doc.page_content,
 
881
  file_url = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/{doc}?token=hf_token"
882
  return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc}</a></b>'
883
 
884
+ """
885
 
886
  def download_link(doc):
887
  # Basis-URL für das Hugging Face Repository
888
  base_url = f"https://huggingface.co/spaces/{STORAGE_REPO_ID}/resolve/main"
889
 
890
+ if isinstance(doc, dict):
891
+ # Wenn doc ein Dictionary ist (wie in Ihrem ursprünglichen Beispiel)
892
+ if 'pfad' in doc:
893
+ doc_path = doc['pfad']
894
+ title = doc.get('titel', doc_path)
895
+ else:
896
+ return f'<b>{doc.get("titel", "Unbekannter Titel")}</b>'
897
  else:
898
+ # Wenn doc ein String ist oder ein anderes Objekt mit einem 'metadata' Attribut
899
+ doc_path = getattr(doc, 'metadata', {}).get('path', doc if isinstance(doc, str) else '')
900
+ title = os.path.basename(doc_path)
901
+
902
+ # Bestimmen des Dokumenttyps und Anpassen des Pfads
903
+ if doc_path.lower().endswith('.pdf'):
904
+ file_url = f"{base_url}/chroma/kkg/pdf/{quote(title)}?token={hf_token}"
905
+ elif doc_path.lower().endswith('.docx'):
906
+ file_url = f"{base_url}/chroma/kkg/word/{quote(title)}?token={hf_token}"
907
+ else:
908
+ # Fallback für andere Dateitypen
909
+ file_url = f"{base_url}/{quote(doc_path)}?token={hf_token}"
910
+
911
+ return file_url #f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{title}</a></b>'
912
+
913
 
914
 
915