jonathanjordan21 commited on
Commit
d7e0f2f
·
verified ·
1 Parent(s): a43c0c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -0
app.py CHANGED
@@ -61,10 +61,18 @@ qwen = HuggingFaceEndpoint(
61
  do_sample=False,
62
  )
63
 
 
 
 
 
 
 
 
64
  llm = prompt_qwen | qwen
65
 
66
  llm2 = prompt_llama | llama
67
 
 
68
  # llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])
69
 
70
 
@@ -82,6 +90,10 @@ async def conversation(data : ConversationPost):
82
  async def conversation2(data : ConversationPost):
83
  return {"output":llm2.invoke({"question":data.question})}
84
 
 
 
 
 
85
 
86
  @app.post("/inference")
87
  async def inference(data : InferencePost):
@@ -89,6 +101,8 @@ async def inference(data : InferencePost):
89
  out = llm2.invoke(data.question)
90
  elif data.with_template == 'qwen':
91
  out = llm.invoke(data.question)
 
 
92
  else:
93
  out = llama.invoke(data.question)
94
 
 
61
  do_sample=False,
62
  )
63
 
64
+ qwen2 = HuggingFaceEndpoint(
65
+ repo_id="Qwen/Qwen2-1.5B-Instruct",
66
+ task="text-generation",
67
+ max_new_tokens=150,
68
+ do_sample=False,
69
+ )
70
+
71
  llm = prompt_qwen | qwen
72
 
73
  llm2 = prompt_llama | llama
74
 
75
+ llm3 = prompt_qwen | qwen2
76
  # llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])
77
 
78
 
 
90
  async def conversation2(data : ConversationPost):
91
  return {"output":llm2.invoke({"question":data.question})}
92
 
93
+ @app.post("/conversation3")
94
+ async def conversation3(data : ConversationPost):
95
+ return {"output":llm3.invoke({"question":data.question})}
96
+
97
 
98
  @app.post("/inference")
99
  async def inference(data : InferencePost):
 
101
  out = llm2.invoke(data.question)
102
  elif data.with_template == 'qwen':
103
  out = llm.invoke(data.question)
104
+ elif data.with_template == 'qwen2':
105
+ out = llm3.invoke(data.question)
106
  else:
107
  out = llama.invoke(data.question)
108