Leo Fan commited on
Commit
98b8cda
1 Parent(s): e388bef

Switched LLM to dedicated server.

Browse files
app/__pycache__/server.cpython-311.pyc CHANGED
Binary files a/app/__pycache__/server.cpython-311.pyc and b/app/__pycache__/server.cpython-311.pyc differ
 
app/server.py CHANGED
@@ -3,6 +3,7 @@ from fastapi.responses import RedirectResponse
3
  from langserve import add_routes
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.llms import HuggingFaceHub
 
6
  import os
7
  from langchain.prompts import ChatPromptTemplate
8
  from langchain_core.runnables import RunnablePassthrough, RunnableParallel
@@ -13,11 +14,20 @@ app = FastAPI()
13
 
14
  # os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
15
 
16
- hf_llm = HuggingFaceHub(
17
- repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
 
 
 
 
 
 
 
18
  huggingfacehub_api_token=os.environ["HF_TOKEN"],
19
  task="text-generation",
20
- model_kwargs={"temperature":0.01, "max_new_tokens" : 250}
 
 
21
  )
22
 
23
  embedding_model_id = 'WhereIsAI/UAE-Large-V1'
 
3
  from langserve import add_routes
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.llms import HuggingFaceHub
6
+ from langchain_community.llms import HuggingFaceEndpoint
7
  import os
8
  from langchain.prompts import ChatPromptTemplate
9
  from langchain_core.runnables import RunnablePassthrough, RunnableParallel
 
14
 
15
  # os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
16
 
17
+ # hf_llm = HuggingFaceHub(
18
+ # repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
19
+ # huggingfacehub_api_token=os.environ["HF_TOKEN"],
20
+ # task="text-generation",
21
+ # model_kwargs={"temperature":0.01, "max_new_tokens" : 250}
22
+ # )
23
+
24
+ hf_llm = HuggingFaceEndpoint(
25
+ endpoint_url="https://kw5c9k0dp4grnml0.us-east-1.aws.endpoints.huggingface.cloud",
26
  huggingfacehub_api_token=os.environ["HF_TOKEN"],
27
  task="text-generation",
28
+ # model_kwargs={"temperature":0.01, "max_new_tokens" : 250}
29
+ temperature= 0.01,
30
+ max_new_tokens = 250,
31
  )
32
 
33
  embedding_model_id = 'WhereIsAI/UAE-Large-V1'