Spaces:

alexkueck
/

TestInferenceAPI

Sleeping

App Files Files Community

alexkueck commited on Apr 9, 2024

Commit

292bf25

verified ·

1 Parent(s): d192523

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -93

app.py CHANGED Viewed

@@ -303,102 +303,107 @@ def transfer_input(inputs):
 ##############################################
 # generate function
 ##############################################
-def generate(text, history, rag_option, model_option,  k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3,):
-    #mit RAG
-    if (rag_option is None):
-        raise gr.Error("Retrieval Augmented Generation ist erforderlich.")
-    if (text == ""):
-        raise gr.Error("Prompt ist erforderlich.")
-    try:
-        if (model_option == "HF1"):
-            #Anfrage an InferenceEndpoint1 ----------------------------
-            API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
-            print("HF1")
-        else:
-            API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
-            print("HF2")
-        if (rag_option == "An"):
-            #muss nur einmal ausgeführt werden...
-            if not splittet:
-                splits = document_loading_splitting()
-                document_storage_chroma(splits)
-            db = document_retrieval_chroma()
-            #mit RAG:
-            neu_text_mit_chunks = rag_chain(text, db, k)
-            #für Chat LLM:
-            #prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
-            #als reiner prompt:
-            prompt = generate_prompt_with_history(neu_text_mit_chunks, history)
-        else:
-            #für Chat LLM:
-            #prompt = generate_prompt_with_history_openai(text, history)
-            #als reiner prompt:
-            prompt = generate_prompt_with_history(text, history)
-        print("prompt:....................................")
-        print (prompt)
-        #Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
-        #payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
-        #Für LLAMA:
-        #payload = tokenizer.apply_chat_template(prompt,tokenize=False)
-        #result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
-        #inference allg:
-        data = {
-            "inputs": prompt,
-            "options": {"max_new_tokens": max_new_tokens},
-        }
-        response= requests.post(API_URL, headers=HEADERS, json=data)
-        result = response.json()
-        print("result:------------------")
-        chatbot_response = result[0]['generated_text']
-        print("anzahl tokens gesamt antwort:------------------")
-        print (len(chatbot_response.split()))
-    except Exception as e:
-        raise gr.Error(e)
-    chatbot_message = chatbot_response[len(prompt):].strip()
-    print("history/chatbot_rsponse:--------------------------------")
-    print(history)
-    print(chatbot_message)
-    """
-    #Antwort als Stream ausgeben...
-    for i in range(len(chatbot_message)):
-        time.sleep(0.03)
-        yield  chatbot_message[: i+1], "Generating"
-        if shared_state.interrupted:
-            shared_state.recover()
-            try:
-                yield chatbot_message[: i+1], "Stop: Success"
-                return
-            except:
-                pass
-    """
-    #Antwort als Stream ausgeben...
-    history[-1][1] = ""
-    for character in chatbot_message:
-        history[-1][1] += character
-        time.sleep(0.03)
-        yield history, "Generating"
-        if shared_state.interrupted:
-            shared_state.recover()
-            try:
-                yield history, "Stop: Success"
-                return
-            except:
-                pass
-    #zum Evaluieren:
-    # custom eli5 criteria
-    #custom_criterion = {"eli5": "Is the output explained in a way that a 5 yeard old would unterstand it?"}
-    #eval_result = evaluator.evaluate_strings(prediction=res.strip(), input=text, criteria=custom_criterion, requires_reference=True)
-    #print ("eval_result:............    ")
-    #print(eval_result)
-    #return res.strip()
 ########################################
 #Evaluation

 ##############################################
 # generate function
 ##############################################
+def generate(text, history, rag_option, model_option,  k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3, validate=False):
+    #nur wenn man sich validiert hat, kann die Anwendung los legen
+    if (validate and not text == "" and not text == None):
+        #mit RAG
+        if (rag_option is None):
+            raise gr.Error("Retrieval Augmented Generation ist erforderlich.")
+        if (text == ""):
+            raise gr.Error("Prompt ist erforderlich.")
+        try:
+            if (model_option == "HF1"):
+                #Anfrage an InferenceEndpoint1 ----------------------------
+                API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
+                print("HF1")
+            else:
+                API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
+                print("HF2")
+            if (rag_option == "An"):
+                #muss nur einmal ausgeführt werden...
+                if not splittet:
+                    splits = document_loading_splitting()
+                    document_storage_chroma(splits)
+                db = document_retrieval_chroma()
+                #mit RAG:
+                neu_text_mit_chunks = rag_chain(text, db, k)
+                #für Chat LLM:
+                #prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
+                #als reiner prompt:
+                prompt = generate_prompt_with_history(neu_text_mit_chunks, history)
+            else:
+                #für Chat LLM:
+                #prompt = generate_prompt_with_history_openai(text, history)
+                #als reiner prompt:
+                prompt = generate_prompt_with_history(text, history)
+            print("prompt:....................................")
+            print (prompt)
+            #Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
+            #payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
+            #Für LLAMA:
+            #payload = tokenizer.apply_chat_template(prompt,tokenize=False)
+            #result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
+            #inference allg:
+            data = {
+                "inputs": prompt,
+                "options": {"max_new_tokens": max_new_tokens},
+            }
+            response= requests.post(API_URL, headers=HEADERS, json=data)
+            result = response.json()
+            print("result:------------------")
+            chatbot_response = result[0]['generated_text']
+            print("anzahl tokens gesamt antwort:------------------")
+            print (len(chatbot_response.split()))
+        except Exception as e:
+            raise gr.Error(e)
+        chatbot_message = chatbot_response[len(prompt):].strip()
+        print("history/chatbot_rsponse:--------------------------------")
+        print(history)
+        print(chatbot_message)
+        """
+        #Antwort als Stream ausgeben...
+        for i in range(len(chatbot_message)):
+            time.sleep(0.03)
+            yield  chatbot_message[: i+1], "Generating"
+            if shared_state.interrupted:
+                shared_state.recover()
+                try:
+                    yield chatbot_message[: i+1], "Stop: Success"
+                    return
+                except:
+                    pass
+        """
+        #Antwort als Stream ausgeben...
+        history[-1][1] = ""
+        for character in chatbot_message:
+            history[-1][1] += character
+            time.sleep(0.03)
+            yield history, "Generating"
+            if shared_state.interrupted:
+                shared_state.recover()
+                try:
+                    yield history, "Stop: Success"
+                    return
+                except:
+                    pass
+        #zum Evaluieren:
+        # custom eli5 criteria
+        #custom_criterion = {"eli5": "Is the output explained in a way that a 5 yeard old would unterstand it?"}
+        #eval_result = evaluator.evaluate_strings(prediction=res.strip(), input=text, criteria=custom_criterion, requires_reference=True)
+        #print ("eval_result:............    ")
+        #print(eval_result)
+        #return res.strip()
+    else: #noch nicht validiert, oder kein Prompt
+        return  history,  "Erst validieren oder einen Prompt eingeben!"
 ########################################
 #Evaluation