alexkueck commited on
Commit
292bf25
·
verified ·
1 Parent(s): d192523

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -93
app.py CHANGED
@@ -303,102 +303,107 @@ def transfer_input(inputs):
303
  ##############################################
304
  # generate function
305
  ##############################################
306
- def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3,):
307
- #mit RAG
308
- if (rag_option is None):
309
- raise gr.Error("Retrieval Augmented Generation ist erforderlich.")
310
- if (text == ""):
311
- raise gr.Error("Prompt ist erforderlich.")
312
-
313
- try:
314
- if (model_option == "HF1"):
315
- #Anfrage an InferenceEndpoint1 ----------------------------
316
- API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
317
- print("HF1")
318
- else:
319
- API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
320
- print("HF2")
321
-
322
- if (rag_option == "An"):
323
- #muss nur einmal ausgeführt werden...
324
- if not splittet:
325
- splits = document_loading_splitting()
326
- document_storage_chroma(splits)
327
- db = document_retrieval_chroma()
328
- #mit RAG:
329
- neu_text_mit_chunks = rag_chain(text, db, k)
330
- #für Chat LLM:
331
- #prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
332
- #als reiner prompt:
333
- prompt = generate_prompt_with_history(neu_text_mit_chunks, history)
334
- else:
335
- #für Chat LLM:
336
- #prompt = generate_prompt_with_history_openai(text, history)
337
- #als reiner prompt:
338
- prompt = generate_prompt_with_history(text, history)
339
- print("prompt:....................................")
340
- print (prompt)
341
- #Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
342
- #payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
343
- #Für LLAMA:
344
- #payload = tokenizer.apply_chat_template(prompt,tokenize=False)
345
- #result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
346
- #inference allg:
347
- data = {
348
- "inputs": prompt,
349
- "options": {"max_new_tokens": max_new_tokens},
350
- }
351
- response= requests.post(API_URL, headers=HEADERS, json=data)
352
- result = response.json()
353
- print("result:------------------")
354
- chatbot_response = result[0]['generated_text']
355
- print("anzahl tokens gesamt antwort:------------------")
356
- print (len(chatbot_response.split()))
357
- except Exception as e:
358
- raise gr.Error(e)
359
-
360
- chatbot_message = chatbot_response[len(prompt):].strip()
361
- print("history/chatbot_rsponse:--------------------------------")
362
- print(history)
363
- print(chatbot_message)
364
-
365
- """
366
- #Antwort als Stream ausgeben...
367
- for i in range(len(chatbot_message)):
368
- time.sleep(0.03)
369
- yield chatbot_message[: i+1], "Generating"
370
- if shared_state.interrupted:
371
- shared_state.recover()
372
- try:
373
- yield chatbot_message[: i+1], "Stop: Success"
374
- return
375
- except:
376
- pass
377
- """
378
 
379
- #Antwort als Stream ausgeben...
380
- history[-1][1] = ""
381
- for character in chatbot_message:
382
- history[-1][1] += character
383
- time.sleep(0.03)
384
- yield history, "Generating"
385
- if shared_state.interrupted:
386
- shared_state.recover()
387
- try:
388
- yield history, "Stop: Success"
389
- return
390
- except:
391
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
 
 
 
 
393
 
394
- #zum Evaluieren:
395
- # custom eli5 criteria
396
- #custom_criterion = {"eli5": "Is the output explained in a way that a 5 yeard old would unterstand it?"}
397
-
398
- #eval_result = evaluator.evaluate_strings(prediction=res.strip(), input=text, criteria=custom_criterion, requires_reference=True)
399
- #print ("eval_result:............ ")
400
- #print(eval_result)
401
- #return res.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
 
403
  ########################################
404
  #Evaluation
 
303
  ##############################################
304
  # generate function
305
  ##############################################
306
+ def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3, validate=False):
307
+ #nur wenn man sich validiert hat, kann die Anwendung los legen
308
+ if (validate and not text == "" and not text == None):
309
+ #mit RAG
310
+ if (rag_option is None):
311
+ raise gr.Error("Retrieval Augmented Generation ist erforderlich.")
312
+ if (text == ""):
313
+ raise gr.Error("Prompt ist erforderlich.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
 
315
+ try:
316
+ if (model_option == "HF1"):
317
+ #Anfrage an InferenceEndpoint1 ----------------------------
318
+ API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
319
+ print("HF1")
320
+ else:
321
+ API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
322
+ print("HF2")
323
+
324
+ if (rag_option == "An"):
325
+ #muss nur einmal ausgeführt werden...
326
+ if not splittet:
327
+ splits = document_loading_splitting()
328
+ document_storage_chroma(splits)
329
+ db = document_retrieval_chroma()
330
+ #mit RAG:
331
+ neu_text_mit_chunks = rag_chain(text, db, k)
332
+ #für Chat LLM:
333
+ #prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
334
+ #als reiner prompt:
335
+ prompt = generate_prompt_with_history(neu_text_mit_chunks, history)
336
+ else:
337
+ #für Chat LLM:
338
+ #prompt = generate_prompt_with_history_openai(text, history)
339
+ #als reiner prompt:
340
+ prompt = generate_prompt_with_history(text, history)
341
+ print("prompt:....................................")
342
+ print (prompt)
343
+ #Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
344
+ #payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
345
+ #Für LLAMA:
346
+ #payload = tokenizer.apply_chat_template(prompt,tokenize=False)
347
+ #result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
348
+ #inference allg:
349
+ data = {
350
+ "inputs": prompt,
351
+ "options": {"max_new_tokens": max_new_tokens},
352
+ }
353
+ response= requests.post(API_URL, headers=HEADERS, json=data)
354
+ result = response.json()
355
+ print("result:------------------")
356
+ chatbot_response = result[0]['generated_text']
357
+ print("anzahl tokens gesamt antwort:------------------")
358
+ print (len(chatbot_response.split()))
359
+ except Exception as e:
360
+ raise gr.Error(e)
361
 
362
+ chatbot_message = chatbot_response[len(prompt):].strip()
363
+ print("history/chatbot_rsponse:--------------------------------")
364
+ print(history)
365
+ print(chatbot_message)
366
 
367
+ """
368
+ #Antwort als Stream ausgeben...
369
+ for i in range(len(chatbot_message)):
370
+ time.sleep(0.03)
371
+ yield chatbot_message[: i+1], "Generating"
372
+ if shared_state.interrupted:
373
+ shared_state.recover()
374
+ try:
375
+ yield chatbot_message[: i+1], "Stop: Success"
376
+ return
377
+ except:
378
+ pass
379
+ """
380
+
381
+ #Antwort als Stream ausgeben...
382
+ history[-1][1] = ""
383
+ for character in chatbot_message:
384
+ history[-1][1] += character
385
+ time.sleep(0.03)
386
+ yield history, "Generating"
387
+ if shared_state.interrupted:
388
+ shared_state.recover()
389
+ try:
390
+ yield history, "Stop: Success"
391
+ return
392
+ except:
393
+ pass
394
+
395
+
396
+ #zum Evaluieren:
397
+ # custom eli5 criteria
398
+ #custom_criterion = {"eli5": "Is the output explained in a way that a 5 yeard old would unterstand it?"}
399
+
400
+ #eval_result = evaluator.evaluate_strings(prediction=res.strip(), input=text, criteria=custom_criterion, requires_reference=True)
401
+ #print ("eval_result:............ ")
402
+ #print(eval_result)
403
+ #return res.strip()
404
+ else: #noch nicht validiert, oder kein Prompt
405
+ return history, "Erst validieren oder einen Prompt eingeben!"
406
+
407
 
408
  ########################################
409
  #Evaluation