File size: 2,773 Bytes
26ca9d4
9423469
 
4ef74d7
2493f19
4ef74d7
f00ccef
4ef74d7
f00ccef
 
 
 
628c773
26ca9d4
b3dfe3c
26ca9d4
f00ccef
291372b
26ca9d4
 
9423469
f00ccef
 
 
b3dfe3c
291372b
f00ccef
 
eb16940
 
 
9423469
 
73a2adf
9f21eff
77b7aca
 
 
 
 
 
 
 
9f21eff
cb6cb3b
3bae375
6ae7f5a
9f21eff
 
 
bd4890c
9f21eff
cb6cb3b
77b7aca
cb6cb3b
9f21eff
 
 
cb6cb3b
 
9f21eff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628c773
9f21eff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73a2adf
628c773
9f21eff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# app.py
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from threading import Thread
import gradio as gr
import re
import torch
from openai import OpenAI

client = OpenAI(
    api_key="sk-420ab66020704eabbe37501ec39b7a2b",
    base_url="https://bailingchat.alipay.com",
)

# define chat function
def chat(user_input, max_tokens=11264):
    # chat history
    messages_template = [
  #      {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
        {"role": "user", "content": user_input}
    ]

    response = client.chat.completions.create(
        model="Ling-lite-1.5-250604",
        messages=messages_template,
        max_tokens=max_tokens,
        temperature=0.01,
        top_p=1,
    )
    resp_text = response.choices[0].message.content
    print(resp_text)
    yield resp_text



# Create a custom layout using Blocks
with gr.Blocks(css="""
    #markdown-output {
        height: 300px;
        overflow-y: auto;
        border: 1px solid #ddd;
        padding: 10px;
    }
""") as demo:
    gr.Markdown(
        "## Ling-lite-1.5 AI Assistant\n"
        "Based on [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5)\n"
#        "Access through [Ling API](https://bailingchat.alipay.com)"
    )

    with gr.Row():
        max_tokens_slider = gr.Slider(minimum=5000, maximum=10000, step=100, label="Generated length")

#    output_box = gr.Textbox(lines=10, label="Response")
    output_box = gr.Markdown(label="Response", elem_id="markdown-output")
    input_box = gr.Textbox(lines=8, label="Input you question")

    examples = gr.Examples(
        examples=[
            ["Introducing the basic concepts of large language models"],
            ["How to solve long context dependencies in math problems?"]
        ],
        inputs=input_box
    )

    interface = gr.Interface(
        fn=chat,
        inputs=[input_box, max_tokens_slider],
        outputs=output_box,
        live=False  # disable auto-triggering on input change
    )

# launch Gradio Service
demo.queue()
demo.launch()

# Construct Gradio Interface
#interface = gr.Interface(
#    fn=chat,
#    inputs=[
#        gr.Textbox(lines=8, label="输入你的问题"),
#        gr.Slider(minimum=100, maximum=102400, step=50, label="生成长度")
#    ],
#    outputs=[
#        gr.Textbox(lines=8, label="模型回复")
#    ],
#    title="Ling-lite-1.5 AI助手",
#    description="基于 [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5)  的对话式文本生成演示。",
#    examples=[
#        ["介绍大型语言模型的基本概念"],
#        ["如何解决数学问题中的长上下文依赖?"]
#    ]
#)

# launch Gradion Service
#interface.launch()