pcr_rag_v2

Running

Leo Fan commited on 27 days ago

Commit

98b8cda

•

1 Parent(s): e388bef

Switched LLM to dedicated server.

Files changed (2) hide show

app/__pycache__/server.cpython-311.pyc CHANGED Viewed

Binary files a/app/__pycache__/server.cpython-311.pyc and b/app/__pycache__/server.cpython-311.pyc differ

app/server.py CHANGED Viewed

@@ -3,6 +3,7 @@ from fastapi.responses import RedirectResponse
 from langserve import add_routes
 from langchain_community.vectorstores import FAISS
 from langchain_community.llms import HuggingFaceHub
 import os
 from langchain.prompts import ChatPromptTemplate
 from langchain_core.runnables import RunnablePassthrough, RunnableParallel
@@ -13,11 +14,20 @@ app = FastAPI()
 # os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
-hf_llm = HuggingFaceHub(
-    repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
     huggingfacehub_api_token=os.environ["HF_TOKEN"],
     task="text-generation",
-    model_kwargs={"temperature":0.01, "max_new_tokens" : 250}
 )
 embedding_model_id = 'WhereIsAI/UAE-Large-V1'

 from langserve import add_routes
 from langchain_community.vectorstores import FAISS
 from langchain_community.llms import HuggingFaceHub
+from langchain_community.llms import HuggingFaceEndpoint
 import os
 from langchain.prompts import ChatPromptTemplate
 from langchain_core.runnables import RunnablePassthrough, RunnableParallel
 # os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
+# hf_llm = HuggingFaceHub(
+#     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
+#     huggingfacehub_api_token=os.environ["HF_TOKEN"],
+#     task="text-generation",
+#     model_kwargs={"temperature":0.01, "max_new_tokens" : 250}
+# )
+hf_llm = HuggingFaceEndpoint(
+    endpoint_url="https://kw5c9k0dp4grnml0.us-east-1.aws.endpoints.huggingface.cloud",
     huggingfacehub_api_token=os.environ["HF_TOKEN"],
     task="text-generation",
+    # model_kwargs={"temperature":0.01, "max_new_tokens" : 250}
+    temperature= 0.01,
+    max_new_tokens = 250,
 )
 embedding_model_id = 'WhereIsAI/UAE-Large-V1'