alexkueck commited on
Commit
ed0b948
1 Parent(s): ca1af8f

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +51 -7
utils.py CHANGED
@@ -15,9 +15,10 @@ import gc
15
  from pygments.lexers import guess_lexer, ClassNotFound
16
  import time
17
  import json
18
- import operator
19
- from typing import Annotated, Sequence, TypedDict
20
- import pprint
 
21
 
22
  from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM, GPTNeoForCausalLM, GPT2Tokenizer
23
  from sentence_transformers import SentenceTransformer, util
@@ -28,9 +29,9 @@ from pypinyin import lazy_pinyin
28
  import tiktoken
29
  import mdtex2html
30
  from markdown import markdown
31
- from pygments import highlight
32
- from pygments.lexers import guess_lexer,get_lexer_by_name
33
- from pygments.formatters import HtmlFormatter
34
 
35
  from langchain.chains import LLMChain, RetrievalQA
36
  from langchain.prompts import PromptTemplate
@@ -389,7 +390,7 @@ def llm_chain2(prompt, context):
389
  inputs = tokenizer_rag(full_prompt, return_tensors="pt", max_length=1024, truncation=True)
390
 
391
  #Generiere die Antwort
392
- outputs = modell_rag.generate(inputs['input_ids'], max_length=1024, num_beams=2, early_stopping=True)
393
  answer = tokenizer_rag.decode(outputs[0], skip_special_tokens=True)
394
 
395
  return answer
@@ -538,6 +539,49 @@ def transfer_input(inputs):
538
  )
539
 
540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  #################################################
542
  #Klasse mit zuständen - z.B. für interrupt wenn Stop gedrückt...
543
  #################################################
 
15
  from pygments.lexers import guess_lexer, ClassNotFound
16
  import time
17
  import json
18
+ import base64
19
+ from io import BytesIO
20
+ import urllib.parse
21
+ import tempfile
22
 
23
  from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM, GPTNeoForCausalLM, GPT2Tokenizer
24
  from sentence_transformers import SentenceTransformer, util
 
29
  import tiktoken
30
  import mdtex2html
31
  from markdown import markdown
32
+ #from pygments import highlight
33
+ #from pygments.lexers import guess_lexer,get_lexer_by_name
34
+ #from pygments.formatters import HtmlFormatter
35
 
36
  from langchain.chains import LLMChain, RetrievalQA
37
  from langchain.prompts import PromptTemplate
 
390
  inputs = tokenizer_rag(full_prompt, return_tensors="pt", max_length=1024, truncation=True)
391
 
392
  #Generiere die Antwort
393
+ outputs = modell_rag.generate(inputs['input_ids'], max_new_tokens=1024, num_beams=2, early_stopping=True)
394
  answer = tokenizer_rag.decode(outputs[0], skip_special_tokens=True)
395
 
396
  return answer
 
539
  )
540
 
541
 
542
+ ########################################################
543
+ ######## Hilfsfunktionen Datei-Upload ##################
544
+ # Hochladen von Dateien
545
+ def upload_pdf(file):
546
+ if file is None:
547
+ return None, "Keine Datei hochgeladen."
548
+
549
+ # Extrahieren des Dateinamens aus dem vollen Pfad
550
+ filename = os.path.basename(file.name)
551
+
552
+ # Datei zum Hugging Face Space hochladen
553
+ upload_path = f"kkg_dokumente/{filename}"
554
+ api.upload_file(
555
+ path_or_fileobj=file.name,
556
+ path_in_repo=upload_path,
557
+ repo_id=REPO_ID,
558
+ repo_type=REPO_TYPE,
559
+ token=HF_WRITE
560
+ )
561
+ return f"PDF '{filename}' erfolgreich hochgeladen."
562
+
563
+ def display_files():
564
+ files = os.listdir(DOCS_DIR)
565
+ files_table = "<table style='width:100%; border-collapse: collapse;'>"
566
+ files_table += "<tr style='background-color: #930BBA; color: white; font-weight: bold; font-size: larger;'><th>Dateiname</th><th>Größe (KB)</th></tr>"
567
+ for i, file in enumerate(files):
568
+ file_path = os.path.join(DOCS_DIR, file)
569
+ file_size = os.path.getsize(file_path) / 1024 # Größe in KB
570
+ row_color = "#4f4f4f" if i % 2 == 0 else "#3a3a3a" # Wechselnde Zeilenfarben
571
+ files_table += f"<tr style='background-color: {row_color}; border-bottom: 1px solid #ddd;'>"
572
+ files_table += f"<td><b>{download_link(file)}</b></td>"
573
+ files_table += f"<td>{file_size:.2f}</td></tr>"
574
+ files_table += "</table>"
575
+ return files_table
576
+
577
+
578
+ # gefundene relevante Dokumente auflisten (links)
579
+ def list_pdfs():
580
+ if not os.path.exists(DOCS_DIR):
581
+ return []
582
+ return [f for f in os.listdir(SAVE_DIR) if f.endswith('.pdf')]
583
+
584
+
585
  #################################################
586
  #Klasse mit zuständen - z.B. für interrupt wenn Stop gedrückt...
587
  #################################################