jonathanjordan21 commited on
Commit
1fd682a
1 Parent(s): d7e0f2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -19,6 +19,9 @@ class InferencePost(BaseModel):
19
  question: str
20
  with_template: Union[str, None] = None
21
 
 
 
 
22
 
23
  API_TOKEN = os.environ['HF_API_KEY']
24
 
@@ -50,21 +53,21 @@ Kamu adalah Asisten AI yang dikembangkan oleh Jonthan Jordan. Answer strictly in
50
  llama = HuggingFaceEndpoint(
51
  repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
52
  task="text-generation",
53
- max_new_tokens=150,
54
  do_sample=False,
55
  )
56
 
57
  qwen = HuggingFaceEndpoint(
58
  repo_id="Qwen/Qwen1.5-4B-Chat",
59
  task="text-generation",
60
- max_new_tokens=150,
61
  do_sample=False,
62
  )
63
 
64
  qwen2 = HuggingFaceEndpoint(
65
  repo_id="Qwen/Qwen2-1.5B-Instruct",
66
  task="text-generation",
67
- max_new_tokens=150,
68
  do_sample=False,
69
  )
70
 
@@ -81,6 +84,19 @@ def greet_json():
81
  return {"Hello": "World!"}
82
 
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  @app.post("/conversation")
85
  async def conversation(data : ConversationPost):
86
  return {"output":llm.invoke({"question":data.question})}
 
19
  question: str
20
  with_template: Union[str, None] = None
21
 
22
+ class LLMPost(BaseModel):
23
+ model: str
24
+ question: str
25
 
26
  API_TOKEN = os.environ['HF_API_KEY']
27
 
 
53
  llama = HuggingFaceEndpoint(
54
  repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
55
  task="text-generation",
56
+ max_new_tokens=4096,
57
  do_sample=False,
58
  )
59
 
60
  qwen = HuggingFaceEndpoint(
61
  repo_id="Qwen/Qwen1.5-4B-Chat",
62
  task="text-generation",
63
+ max_new_tokens=4096,
64
  do_sample=False,
65
  )
66
 
67
  qwen2 = HuggingFaceEndpoint(
68
  repo_id="Qwen/Qwen2-1.5B-Instruct",
69
  task="text-generation",
70
+ max_new_tokens=4096,
71
  do_sample=False,
72
  )
73
 
 
84
  return {"Hello": "World!"}
85
 
86
 
87
+
88
+ @app.post("/chat")
89
+ async def chat(data: LLMPost):
90
+ if data.model == 'llama':
91
+ return {"data":llama.invoke(data.question)}
92
+ elif data.model == 'qwen':
93
+ return {"data":qwen.invoke(data.question)}
94
+ else:
95
+ return {"data":qwen2.invoke(data.question)}
96
+
97
+
98
+
99
+
100
  @app.post("/conversation")
101
  async def conversation(data : ConversationPost):
102
  return {"output":llm.invoke({"question":data.question})}