Spaces:
Running
Running
Leo Fan
commited on
Commit
•
98b8cda
1
Parent(s):
e388bef
Switched LLM to dedicated server.
Browse files- app/__pycache__/server.cpython-311.pyc +0 -0
- app/server.py +13 -3
app/__pycache__/server.cpython-311.pyc
CHANGED
Binary files a/app/__pycache__/server.cpython-311.pyc and b/app/__pycache__/server.cpython-311.pyc differ
|
|
app/server.py
CHANGED
@@ -3,6 +3,7 @@ from fastapi.responses import RedirectResponse
|
|
3 |
from langserve import add_routes
|
4 |
from langchain_community.vectorstores import FAISS
|
5 |
from langchain_community.llms import HuggingFaceHub
|
|
|
6 |
import os
|
7 |
from langchain.prompts import ChatPromptTemplate
|
8 |
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
|
@@ -13,11 +14,20 @@ app = FastAPI()
|
|
13 |
|
14 |
# os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
|
15 |
|
16 |
-
hf_llm = HuggingFaceHub(
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
huggingfacehub_api_token=os.environ["HF_TOKEN"],
|
19 |
task="text-generation",
|
20 |
-
model_kwargs={"temperature":0.01, "max_new_tokens" : 250}
|
|
|
|
|
21 |
)
|
22 |
|
23 |
embedding_model_id = 'WhereIsAI/UAE-Large-V1'
|
|
|
3 |
from langserve import add_routes
|
4 |
from langchain_community.vectorstores import FAISS
|
5 |
from langchain_community.llms import HuggingFaceHub
|
6 |
+
from langchain_community.llms import HuggingFaceEndpoint
|
7 |
import os
|
8 |
from langchain.prompts import ChatPromptTemplate
|
9 |
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
|
|
|
14 |
|
15 |
# os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
|
16 |
|
17 |
+
# hf_llm = HuggingFaceHub(
|
18 |
+
# repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
19 |
+
# huggingfacehub_api_token=os.environ["HF_TOKEN"],
|
20 |
+
# task="text-generation",
|
21 |
+
# model_kwargs={"temperature":0.01, "max_new_tokens" : 250}
|
22 |
+
# )
|
23 |
+
|
24 |
+
hf_llm = HuggingFaceEndpoint(
|
25 |
+
endpoint_url="https://kw5c9k0dp4grnml0.us-east-1.aws.endpoints.huggingface.cloud",
|
26 |
huggingfacehub_api_token=os.environ["HF_TOKEN"],
|
27 |
task="text-generation",
|
28 |
+
# model_kwargs={"temperature":0.01, "max_new_tokens" : 250}
|
29 |
+
temperature= 0.01,
|
30 |
+
max_new_tokens = 250,
|
31 |
)
|
32 |
|
33 |
embedding_model_id = 'WhereIsAI/UAE-Large-V1'
|