Spaces:
Sleeping
Sleeping
jonathanjordan21
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -61,10 +61,18 @@ qwen = HuggingFaceEndpoint(
|
|
61 |
do_sample=False,
|
62 |
)
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
llm = prompt_qwen | qwen
|
65 |
|
66 |
llm2 = prompt_llama | llama
|
67 |
|
|
|
68 |
# llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])
|
69 |
|
70 |
|
@@ -82,6 +90,10 @@ async def conversation(data : ConversationPost):
|
|
82 |
async def conversation2(data : ConversationPost):
|
83 |
return {"output":llm2.invoke({"question":data.question})}
|
84 |
|
|
|
|
|
|
|
|
|
85 |
|
86 |
@app.post("/inference")
|
87 |
async def inference(data : InferencePost):
|
@@ -89,6 +101,8 @@ async def inference(data : InferencePost):
|
|
89 |
out = llm2.invoke(data.question)
|
90 |
elif data.with_template == 'qwen':
|
91 |
out = llm.invoke(data.question)
|
|
|
|
|
92 |
else:
|
93 |
out = llama.invoke(data.question)
|
94 |
|
|
|
61 |
do_sample=False,
|
62 |
)
|
63 |
|
64 |
+
qwen2 = HuggingFaceEndpoint(
|
65 |
+
repo_id="Qwen/Qwen2-1.5B-Instruct",
|
66 |
+
task="text-generation",
|
67 |
+
max_new_tokens=150,
|
68 |
+
do_sample=False,
|
69 |
+
)
|
70 |
+
|
71 |
llm = prompt_qwen | qwen
|
72 |
|
73 |
llm2 = prompt_llama | llama
|
74 |
|
75 |
+
llm3 = prompt_qwen | qwen2
|
76 |
# llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])
|
77 |
|
78 |
|
|
|
90 |
async def conversation2(data : ConversationPost):
|
91 |
return {"output":llm2.invoke({"question":data.question})}
|
92 |
|
93 |
+
@app.post("/conversation3")
|
94 |
+
async def conversation3(data : ConversationPost):
|
95 |
+
return {"output":llm3.invoke({"question":data.question})}
|
96 |
+
|
97 |
|
98 |
@app.post("/inference")
|
99 |
async def inference(data : InferencePost):
|
|
|
101 |
out = llm2.invoke(data.question)
|
102 |
elif data.with_template == 'qwen':
|
103 |
out = llm.invoke(data.question)
|
104 |
+
elif data.with_template == 'qwen2':
|
105 |
+
out = llm3.invoke(data.question)
|
106 |
else:
|
107 |
out = llama.invoke(data.question)
|
108 |
|