Update main.py
Browse files
main.py
CHANGED
@@ -435,19 +435,18 @@ async def start():
|
|
435 |
from transformers import pipeline
|
436 |
from langchain_huggingface.llms import HuggingFacePipeline
|
437 |
repo_id = "meta-llama/Llama-3.2-3B-Instruct"
|
438 |
-
pipe = pipeline(
|
439 |
-
"text-generation",
|
440 |
-
model=repo_id,
|
441 |
-
torch_dtype=torch.bfloat16,
|
442 |
-
device_map="auto",
|
443 |
-
)
|
444 |
#model = HuggingFaceEndpoint(
|
445 |
# repo_id=repo_id,
|
446 |
# max_new_tokens=6000,
|
447 |
# temperature=1.0,
|
448 |
# streaming=True
|
449 |
#)
|
450 |
-
model = HuggingFacePipeline(
|
|
|
|
|
|
|
|
|
|
|
451 |
if not cl.user_session.get("saveMemory"):
|
452 |
cl.user_session.set("saveMemory", "")
|
453 |
cl.user_session.set("memory", ConversationBufferMemory(return_messages=True))
|
|
|
435 |
from transformers import pipeline
|
436 |
from langchain_huggingface.llms import HuggingFacePipeline
|
437 |
repo_id = "meta-llama/Llama-3.2-3B-Instruct"
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
#model = HuggingFaceEndpoint(
|
439 |
# repo_id=repo_id,
|
440 |
# max_new_tokens=6000,
|
441 |
# temperature=1.0,
|
442 |
# streaming=True
|
443 |
#)
|
444 |
+
model = HuggingFacePipeline.from_model_id(
|
445 |
+
model_id=repo_id,
|
446 |
+
task="text-generation",
|
447 |
+
device_map="auto",
|
448 |
+
pipeline_kwargs={"max_new_tokens": 6000},
|
449 |
+
)
|
450 |
if not cl.user_session.get("saveMemory"):
|
451 |
cl.user_session.set("saveMemory", "")
|
452 |
cl.user_session.set("memory", ConversationBufferMemory(return_messages=True))
|