Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ import tqdm
|
|
22 |
import accelerate
|
23 |
import re
|
24 |
|
25 |
-
|
26 |
|
27 |
|
28 |
# default_persist_directory = './chroma_HF/'
|
@@ -111,7 +111,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
111 |
max_new_tokens = max_tokens,
|
112 |
top_k = top_k,
|
113 |
load_in_8bit = True,
|
114 |
-
huggingfacehub_api_token =
|
115 |
)
|
116 |
elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
|
117 |
raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
|
@@ -120,7 +120,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
120 |
temperature = temperature,
|
121 |
max_new_tokens = max_tokens,
|
122 |
top_k = top_k,
|
123 |
-
huggingfacehub_api_token =
|
124 |
)
|
125 |
elif llm_model == "microsoft/phi-2":
|
126 |
# raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
|
@@ -132,7 +132,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
132 |
top_k = top_k,
|
133 |
trust_remote_code = True,
|
134 |
torch_dtype = "auto",
|
135 |
-
huggingfacehub_api_token =
|
136 |
)
|
137 |
elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
138 |
llm = HuggingFaceEndpoint(
|
@@ -141,7 +141,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
141 |
temperature = temperature,
|
142 |
max_new_tokens = 250,
|
143 |
top_k = top_k,
|
144 |
-
huggingfacehub_api_token =
|
145 |
)
|
146 |
elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
|
147 |
raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
@@ -151,7 +151,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
151 |
temperature = temperature,
|
152 |
max_new_tokens = max_tokens,
|
153 |
top_k = top_k,
|
154 |
-
huggingfacehub_api_token =
|
155 |
)
|
156 |
else:
|
157 |
llm = HuggingFaceEndpoint(
|
@@ -161,7 +161,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
161 |
temperature = temperature,
|
162 |
max_new_tokens = max_tokens,
|
163 |
top_k = top_k,
|
164 |
-
huggingfacehub_api_token =
|
165 |
)
|
166 |
|
167 |
progress(0.75, desc="Defining buffer memory...")
|
|
|
22 |
import accelerate
|
23 |
import re
|
24 |
|
25 |
+
api_key = os.getenv('API_KEY')
|
26 |
|
27 |
|
28 |
# default_persist_directory = './chroma_HF/'
|
|
|
111 |
max_new_tokens = max_tokens,
|
112 |
top_k = top_k,
|
113 |
load_in_8bit = True,
|
114 |
+
huggingfacehub_api_token = 'api_key',
|
115 |
)
|
116 |
elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
|
117 |
raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
|
|
|
120 |
temperature = temperature,
|
121 |
max_new_tokens = max_tokens,
|
122 |
top_k = top_k,
|
123 |
+
huggingfacehub_api_token = 'api_key',
|
124 |
)
|
125 |
elif llm_model == "microsoft/phi-2":
|
126 |
# raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
|
|
|
132 |
top_k = top_k,
|
133 |
trust_remote_code = True,
|
134 |
torch_dtype = "auto",
|
135 |
+
huggingfacehub_api_token = 'api_key',
|
136 |
)
|
137 |
elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
138 |
llm = HuggingFaceEndpoint(
|
|
|
141 |
temperature = temperature,
|
142 |
max_new_tokens = 250,
|
143 |
top_k = top_k,
|
144 |
+
huggingfacehub_api_token = 'api_key',
|
145 |
)
|
146 |
elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
|
147 |
raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
|
|
151 |
temperature = temperature,
|
152 |
max_new_tokens = max_tokens,
|
153 |
top_k = top_k,
|
154 |
+
huggingfacehub_api_token = 'api_key',
|
155 |
)
|
156 |
else:
|
157 |
llm = HuggingFaceEndpoint(
|
|
|
161 |
temperature = temperature,
|
162 |
max_new_tokens = max_tokens,
|
163 |
top_k = top_k,
|
164 |
+
huggingfacehub_api_token = 'api_key',
|
165 |
)
|
166 |
|
167 |
progress(0.75, desc="Defining buffer memory...")
|