ryan0303 commited on
Commit
d33b042
1 Parent(s): f042dba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -30
app.py CHANGED
@@ -26,12 +26,12 @@ import re
26
 
27
 
28
  # default_persist_directory = './chroma_HF/'
29
- list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
30
  #"google/gemma-7b-it","google/gemma-2b-it", \
31
  #"HuggingFaceH4/zephyr-7b-beta", \
32
  #"meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
33
  #"TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
34
- "google/flan-t5-xxl"
35
  ]
36
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
37
 
@@ -103,33 +103,33 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
103
  # Use of trust_remote_code as model_kwargs
104
  # Warning: langchain issue
105
  # URL: https://github.com/langchain-ai/langchain/issues/6080
106
- if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
107
- llm = HuggingFaceEndpoint(
108
- repo_id=llm_model,
109
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
110
- temperature = temperature,
111
- max_new_tokens = max_tokens,
112
- top_k = top_k,
113
- load_in_8bit = True,
114
- )
115
- elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
116
- llm = HuggingFaceEndpoint(
117
- repo_id=llm_model,
118
- # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
119
- temperature = temperature,
120
- max_new_tokens = 250,
121
- top_k = top_k,
122
- )
123
- elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
124
- raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
125
- llm = HuggingFaceEndpoint(
126
- repo_id=llm_model,
127
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
128
- temperature = temperature,
129
- max_new_tokens = max_tokens,
130
- top_k = top_k,
131
- )
132
- else:
133
  llm = HuggingFaceEndpoint(
134
  repo_id=llm_model,
135
  # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
@@ -253,7 +253,7 @@ def conversation(qa_chain, message, history):
253
  #print("formatted_chat_history",formatted_chat_history)
254
 
255
  # Generate response using QA chain
256
- response = qa_chain({"question": message, "chat_history": formatted_chat_history})
257
  response_answer = response["answer"]
258
  if response_answer.find("Helpful Answer:") != -1:
259
  response_answer = response_answer.split("Helpful Answer:")[-1]
 
26
 
27
 
28
  # default_persist_directory = './chroma_HF/'
29
+ list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1"#, "mistralai/Mistral-7B-Instruct-v0.1", \
30
  #"google/gemma-7b-it","google/gemma-2b-it", \
31
  #"HuggingFaceH4/zephyr-7b-beta", \
32
  #"meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
33
  #"TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
34
+ #"google/flan-t5-xxl"
35
  ]
36
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
37
 
 
103
  # Use of trust_remote_code as model_kwargs
104
  # Warning: langchain issue
105
  # URL: https://github.com/langchain-ai/langchain/issues/6080
106
+ #if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
107
+ # llm = HuggingFaceEndpoint(
108
+ # repo_id=llm_model,
109
+ # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
110
+ # temperature = temperature,
111
+ # max_new_tokens = max_tokens,
112
+ # top_k = top_k,
113
+ # load_in_8bit = True,
114
+ # )
115
+ #elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
116
+ # llm = HuggingFaceEndpoint(
117
+ # repo_id=llm_model,
118
+ # # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
119
+ # temperature = temperature,
120
+ # max_new_tokens = 250,
121
+ # top_k = top_k,
122
+ # )
123
+ #elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
124
+ # raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
125
+ # llm = HuggingFaceEndpoint(
126
+ # repo_id=llm_model,
127
+ # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
128
+ # temperature = temperature,
129
+ # max_new_tokens = max_tokens,
130
+ # top_k = top_k,
131
+ # )
132
+ #else:
133
  llm = HuggingFaceEndpoint(
134
  repo_id=llm_model,
135
  # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
 
253
  #print("formatted_chat_history",formatted_chat_history)
254
 
255
  # Generate response using QA chain
256
+ response = qa_chain({"question": message, "chat_history": formatted_chat_history, "prompt": prompt_template})
257
  response_answer = response["answer"]
258
  if response_answer.find("Helpful Answer:") != -1:
259
  response_answer = response_answer.split("Helpful Answer:")[-1]