Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -303,102 +303,107 @@ def transfer_input(inputs):
|
|
303 |
##############################################
|
304 |
# generate function
|
305 |
##############################################
|
306 |
-
def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3,):
|
307 |
-
#
|
308 |
-
if (
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
if (model_option == "HF1"):
|
315 |
-
#Anfrage an InferenceEndpoint1 ----------------------------
|
316 |
-
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
317 |
-
print("HF1")
|
318 |
-
else:
|
319 |
-
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
|
320 |
-
print("HF2")
|
321 |
-
|
322 |
-
if (rag_option == "An"):
|
323 |
-
#muss nur einmal ausgeführt werden...
|
324 |
-
if not splittet:
|
325 |
-
splits = document_loading_splitting()
|
326 |
-
document_storage_chroma(splits)
|
327 |
-
db = document_retrieval_chroma()
|
328 |
-
#mit RAG:
|
329 |
-
neu_text_mit_chunks = rag_chain(text, db, k)
|
330 |
-
#für Chat LLM:
|
331 |
-
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
332 |
-
#als reiner prompt:
|
333 |
-
prompt = generate_prompt_with_history(neu_text_mit_chunks, history)
|
334 |
-
else:
|
335 |
-
#für Chat LLM:
|
336 |
-
#prompt = generate_prompt_with_history_openai(text, history)
|
337 |
-
#als reiner prompt:
|
338 |
-
prompt = generate_prompt_with_history(text, history)
|
339 |
-
print("prompt:....................................")
|
340 |
-
print (prompt)
|
341 |
-
#Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
|
342 |
-
#payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
|
343 |
-
#Für LLAMA:
|
344 |
-
#payload = tokenizer.apply_chat_template(prompt,tokenize=False)
|
345 |
-
#result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
|
346 |
-
#inference allg:
|
347 |
-
data = {
|
348 |
-
"inputs": prompt,
|
349 |
-
"options": {"max_new_tokens": max_new_tokens},
|
350 |
-
}
|
351 |
-
response= requests.post(API_URL, headers=HEADERS, json=data)
|
352 |
-
result = response.json()
|
353 |
-
print("result:------------------")
|
354 |
-
chatbot_response = result[0]['generated_text']
|
355 |
-
print("anzahl tokens gesamt antwort:------------------")
|
356 |
-
print (len(chatbot_response.split()))
|
357 |
-
except Exception as e:
|
358 |
-
raise gr.Error(e)
|
359 |
-
|
360 |
-
chatbot_message = chatbot_response[len(prompt):].strip()
|
361 |
-
print("history/chatbot_rsponse:--------------------------------")
|
362 |
-
print(history)
|
363 |
-
print(chatbot_message)
|
364 |
-
|
365 |
-
"""
|
366 |
-
#Antwort als Stream ausgeben...
|
367 |
-
for i in range(len(chatbot_message)):
|
368 |
-
time.sleep(0.03)
|
369 |
-
yield chatbot_message[: i+1], "Generating"
|
370 |
-
if shared_state.interrupted:
|
371 |
-
shared_state.recover()
|
372 |
-
try:
|
373 |
-
yield chatbot_message[: i+1], "Stop: Success"
|
374 |
-
return
|
375 |
-
except:
|
376 |
-
pass
|
377 |
-
"""
|
378 |
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
|
|
|
|
|
|
|
|
393 |
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
|
403 |
########################################
|
404 |
#Evaluation
|
|
|
303 |
##############################################
|
304 |
# generate function
|
305 |
##############################################
|
306 |
+
def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3, validate=False):
|
307 |
+
#nur wenn man sich validiert hat, kann die Anwendung los legen
|
308 |
+
if (validate and not text == "" and not text == None):
|
309 |
+
#mit RAG
|
310 |
+
if (rag_option is None):
|
311 |
+
raise gr.Error("Retrieval Augmented Generation ist erforderlich.")
|
312 |
+
if (text == ""):
|
313 |
+
raise gr.Error("Prompt ist erforderlich.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
|
315 |
+
try:
|
316 |
+
if (model_option == "HF1"):
|
317 |
+
#Anfrage an InferenceEndpoint1 ----------------------------
|
318 |
+
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
319 |
+
print("HF1")
|
320 |
+
else:
|
321 |
+
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
|
322 |
+
print("HF2")
|
323 |
+
|
324 |
+
if (rag_option == "An"):
|
325 |
+
#muss nur einmal ausgeführt werden...
|
326 |
+
if not splittet:
|
327 |
+
splits = document_loading_splitting()
|
328 |
+
document_storage_chroma(splits)
|
329 |
+
db = document_retrieval_chroma()
|
330 |
+
#mit RAG:
|
331 |
+
neu_text_mit_chunks = rag_chain(text, db, k)
|
332 |
+
#für Chat LLM:
|
333 |
+
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
334 |
+
#als reiner prompt:
|
335 |
+
prompt = generate_prompt_with_history(neu_text_mit_chunks, history)
|
336 |
+
else:
|
337 |
+
#für Chat LLM:
|
338 |
+
#prompt = generate_prompt_with_history_openai(text, history)
|
339 |
+
#als reiner prompt:
|
340 |
+
prompt = generate_prompt_with_history(text, history)
|
341 |
+
print("prompt:....................................")
|
342 |
+
print (prompt)
|
343 |
+
#Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
|
344 |
+
#payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
|
345 |
+
#Für LLAMA:
|
346 |
+
#payload = tokenizer.apply_chat_template(prompt,tokenize=False)
|
347 |
+
#result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
|
348 |
+
#inference allg:
|
349 |
+
data = {
|
350 |
+
"inputs": prompt,
|
351 |
+
"options": {"max_new_tokens": max_new_tokens},
|
352 |
+
}
|
353 |
+
response= requests.post(API_URL, headers=HEADERS, json=data)
|
354 |
+
result = response.json()
|
355 |
+
print("result:------------------")
|
356 |
+
chatbot_response = result[0]['generated_text']
|
357 |
+
print("anzahl tokens gesamt antwort:------------------")
|
358 |
+
print (len(chatbot_response.split()))
|
359 |
+
except Exception as e:
|
360 |
+
raise gr.Error(e)
|
361 |
|
362 |
+
chatbot_message = chatbot_response[len(prompt):].strip()
|
363 |
+
print("history/chatbot_rsponse:--------------------------------")
|
364 |
+
print(history)
|
365 |
+
print(chatbot_message)
|
366 |
|
367 |
+
"""
|
368 |
+
#Antwort als Stream ausgeben...
|
369 |
+
for i in range(len(chatbot_message)):
|
370 |
+
time.sleep(0.03)
|
371 |
+
yield chatbot_message[: i+1], "Generating"
|
372 |
+
if shared_state.interrupted:
|
373 |
+
shared_state.recover()
|
374 |
+
try:
|
375 |
+
yield chatbot_message[: i+1], "Stop: Success"
|
376 |
+
return
|
377 |
+
except:
|
378 |
+
pass
|
379 |
+
"""
|
380 |
+
|
381 |
+
#Antwort als Stream ausgeben...
|
382 |
+
history[-1][1] = ""
|
383 |
+
for character in chatbot_message:
|
384 |
+
history[-1][1] += character
|
385 |
+
time.sleep(0.03)
|
386 |
+
yield history, "Generating"
|
387 |
+
if shared_state.interrupted:
|
388 |
+
shared_state.recover()
|
389 |
+
try:
|
390 |
+
yield history, "Stop: Success"
|
391 |
+
return
|
392 |
+
except:
|
393 |
+
pass
|
394 |
+
|
395 |
+
|
396 |
+
#zum Evaluieren:
|
397 |
+
# custom eli5 criteria
|
398 |
+
#custom_criterion = {"eli5": "Is the output explained in a way that a 5 yeard old would unterstand it?"}
|
399 |
+
|
400 |
+
#eval_result = evaluator.evaluate_strings(prediction=res.strip(), input=text, criteria=custom_criterion, requires_reference=True)
|
401 |
+
#print ("eval_result:............ ")
|
402 |
+
#print(eval_result)
|
403 |
+
#return res.strip()
|
404 |
+
else: #noch nicht validiert, oder kein Prompt
|
405 |
+
return history, "Erst validieren oder einen Prompt eingeben!"
|
406 |
+
|
407 |
|
408 |
########################################
|
409 |
#Evaluation
|