Spaces:

ryan0303
/

requirements

Sleeping

App Files Files Community

ryan0303 commited on Mar 28

Commit

d33b042

•

1 Parent(s): f042dba

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -30

app.py CHANGED Viewed

@@ -26,12 +26,12 @@ import re
 # default_persist_directory = './chroma_HF/'
-list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
     #"google/gemma-7b-it","google/gemma-2b-it", \
     #"HuggingFaceH4/zephyr-7b-beta", \
     #"meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
     #"TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
-    "google/flan-t5-xxl"
 ]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
@@ -103,33 +103,33 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
     # Use of trust_remote_code as model_kwargs
     # Warning: langchain issue
     # URL: https://github.com/langchain-ai/langchain/issues/6080
-    if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
-        llm = HuggingFaceEndpoint(
-            repo_id=llm_model,
-            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
-            temperature = temperature,
-            max_new_tokens = max_tokens,
-            top_k = top_k,
-            load_in_8bit = True,
-        )
-    elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
-        llm = HuggingFaceEndpoint(
-            repo_id=llm_model,
-            # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
-            temperature = temperature,
-            max_new_tokens = 250,
-            top_k = top_k,
-        )
-    elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
-        raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
-        llm = HuggingFaceEndpoint(
-            repo_id=llm_model,
-            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
-            temperature = temperature,
-            max_new_tokens = max_tokens,
-            top_k = top_k,
-        )
-    else:
         llm = HuggingFaceEndpoint(
             repo_id=llm_model,
             # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
@@ -253,7 +253,7 @@ def conversation(qa_chain, message, history):
     #print("formatted_chat_history",formatted_chat_history)
     # Generate response using QA chain
-    response = qa_chain({"question": message, "chat_history": formatted_chat_history})
     response_answer = response["answer"]
     if response_answer.find("Helpful Answer:") != -1:
         response_answer = response_answer.split("Helpful Answer:")[-1]

 # default_persist_directory = './chroma_HF/'
+list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1"#, "mistralai/Mistral-7B-Instruct-v0.1", \
     #"google/gemma-7b-it","google/gemma-2b-it", \
     #"HuggingFaceH4/zephyr-7b-beta", \
     #"meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
     #"TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
+    #"google/flan-t5-xxl"
 ]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
     # Use of trust_remote_code as model_kwargs
     # Warning: langchain issue
     # URL: https://github.com/langchain-ai/langchain/issues/6080
+    #if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
+    #    llm = HuggingFaceEndpoint(
+    #        repo_id=llm_model,
+    #        # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
+    #        temperature = temperature,
+    #        max_new_tokens = max_tokens,
+    #        top_k = top_k,
+    #        load_in_8bit = True,
+    #    )
+    #elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
+    #    llm = HuggingFaceEndpoint(
+    #       repo_id=llm_model,
+    #        # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
+    #        temperature = temperature,
+    #        max_new_tokens = 250,
+    #        top_k = top_k,
+    #    )
+    #elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
+    #    raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
+    #    llm = HuggingFaceEndpoint(
+    #        repo_id=llm_model,
+    #        # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
+    #        temperature = temperature,
+    #        max_new_tokens = max_tokens,
+    #        top_k = top_k,
+    #    )
+    #else:
         llm = HuggingFaceEndpoint(
             repo_id=llm_model,
             # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
     #print("formatted_chat_history",formatted_chat_history)
     # Generate response using QA chain
+    response = qa_chain({"question": message, "chat_history": formatted_chat_history, "prompt": prompt_template})
     response_answer = response["answer"]
     if response_answer.find("Helpful Answer:") != -1:
         response_answer = response_answer.split("Helpful Answer:")[-1]