- app/gen/openllm.py +46 -0
- app/main.py +2 -51
- app/ui.py +13 -0
app/gen/openllm.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import AsyncOpenAI
|
2 |
+
|
3 |
+
base_url = "http://127.0.0.1:8080/v1"
|
4 |
+
client = AsyncOpenAI(base_url=base_url, api_key="-")
|
5 |
+
|
6 |
+
def _default_parameters():
|
7 |
+
"""
|
8 |
+
frequency_penalty: Optional[float] = None,
|
9 |
+
logit_bias: Optional[List[float]] = None,
|
10 |
+
logprobs: Optional[bool] = None,
|
11 |
+
top_logprobs: Optional[int] = None,
|
12 |
+
max_tokens: Optional[int] = None,
|
13 |
+
n: Optional[int] = None,
|
14 |
+
presence_penalty: Optional[float] = None,
|
15 |
+
stream: bool = False,
|
16 |
+
seed: Optional[int] = None,
|
17 |
+
temperature: Optional[float] = None,
|
18 |
+
top_p: Optional[float] = None,
|
19 |
+
tools: Optional[List[Tool]] = None,
|
20 |
+
tool_choice: Optional[str] = None,
|
21 |
+
"""
|
22 |
+
return {
|
23 |
+
"max_tokens": 256,
|
24 |
+
"stream": True,
|
25 |
+
"temperature": 0.9,
|
26 |
+
}
|
27 |
+
|
28 |
+
def ranslate_messages(history):
|
29 |
+
messages = []
|
30 |
+
|
31 |
+
for conv in history:
|
32 |
+
messages.append({"role":"user", "content":conv[0]})
|
33 |
+
messages.append({"role":"assistant", "content":conv[1]})
|
34 |
+
|
35 |
+
return messages
|
36 |
+
|
37 |
+
async def chat(messages, parameters=None):
|
38 |
+
if parameters is None:
|
39 |
+
parameters = _default_parameters()
|
40 |
+
|
41 |
+
responses = await client.chat.completions.create(
|
42 |
+
model="tgi", messages=messages, **parameters
|
43 |
+
)
|
44 |
+
|
45 |
+
async for resp in responses:
|
46 |
+
yield resp.choices[0].delta.content
|
app/main.py
CHANGED
@@ -1,59 +1,10 @@
|
|
1 |
import argparse
|
2 |
import gradio as gr
|
3 |
-
from
|
4 |
-
|
5 |
-
base_url = "http://127.0.0.1:8080/v1"
|
6 |
-
client = AsyncOpenAI(base_url=base_url, api_key="-")
|
7 |
-
|
8 |
-
"""
|
9 |
-
frequency_penalty: Optional[float] = None,
|
10 |
-
logit_bias: Optional[List[float]] = None,
|
11 |
-
logprobs: Optional[bool] = None,
|
12 |
-
top_logprobs: Optional[int] = None,
|
13 |
-
max_tokens: Optional[int] = None,
|
14 |
-
n: Optional[int] = None,
|
15 |
-
presence_penalty: Optional[float] = None,
|
16 |
-
stream: bool = False,
|
17 |
-
seed: Optional[int] = None,
|
18 |
-
temperature: Optional[float] = None,
|
19 |
-
top_p: Optional[float] = None,
|
20 |
-
tools: Optional[List[Tool]] = None,
|
21 |
-
tool_choice: Optional[str] = None,
|
22 |
-
"""
|
23 |
-
|
24 |
-
def _default_parameters():
|
25 |
-
return {
|
26 |
-
"max_tokens": 256,
|
27 |
-
"stream": True,
|
28 |
-
"temperature": 0.9,
|
29 |
-
}
|
30 |
-
|
31 |
-
def _translate_messages(history):
|
32 |
-
messages = []
|
33 |
-
|
34 |
-
for conv in history:
|
35 |
-
messages.append({"role":"user", "content":conv[0]})
|
36 |
-
messages.append({"role":"assistant", "content":conv[1]})
|
37 |
-
|
38 |
-
return messages
|
39 |
-
|
40 |
-
async def echo(message, history):
|
41 |
-
parameters = _default_parameters()
|
42 |
-
messages = _translate_messages(history)
|
43 |
-
messages.append({"role":"user", "content":message})
|
44 |
-
|
45 |
-
responses = await client.chat.completions.create(
|
46 |
-
model="tgi", messages=messages, **parameters
|
47 |
-
)
|
48 |
-
|
49 |
-
full_resp = ""
|
50 |
-
async for resp in responses:
|
51 |
-
full_resp = full_resp + resp.choices[0].delta.content
|
52 |
-
yield full_resp
|
53 |
|
54 |
def main(args):
|
55 |
demo = gr.ChatInterface(
|
56 |
-
fn=
|
57 |
examples=["hello", "how are you?", "What is Large Language Model?"],
|
58 |
title="Space of Gradio ➕ Text Generation Inference",
|
59 |
multimodal=False
|
|
|
1 |
import argparse
|
2 |
import gradio as gr
|
3 |
+
from app.ui import chat
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
def main(args):
|
6 |
demo = gr.ChatInterface(
|
7 |
+
fn=chat,
|
8 |
examples=["hello", "how are you?", "What is Large Language Model?"],
|
9 |
title="Space of Gradio ➕ Text Generation Inference",
|
10 |
multimodal=False
|
app/ui.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app.gen.openllm import (
|
2 |
+
chat,
|
3 |
+
translate_messages,
|
4 |
+
)
|
5 |
+
|
6 |
+
async def chat(message, history):
|
7 |
+
messages = translate_messages(history)
|
8 |
+
messages.append({"role":"user", "content":message})
|
9 |
+
|
10 |
+
full_resp = ""
|
11 |
+
async for resp in chat(messages):
|
12 |
+
full_resp = full_resp + resp
|
13 |
+
yield full_resp
|