Spaces:

THUDM
/

GLM-130B

Runtime error

File size: 6,376 Bytes

07f3d5b
 
 
 
 
 
2807c72
07f3d5b
 
 
 
 
 
 
 
6a7022c
 
 
5f01803
d7dfd61
07f3d5b
 
 
5f01803
1ae4c5f
07f3d5b
 
 
 
 
 
 
 
bd9c730
a47f8a0
b387589
bd9c730
07f3d5b
 
 
5f01803
07f3d5b
 
5ee2bac
 
 
7d77191
38e35dd
c541ed9
 
21443c3
6a7022c
77389b9
8e90038
 
77389b9
510389c
8e90038
07f3d5b
 
 
d4b9a9c
 
aa0674a
d4b9a9c
f84a603
ff186a2
601d175
d4b9a9c
601d175
d4b9a9c
07f3d5b
 
 
a0466df
6f78015
4207e1a
45db462
4207e1a
 
a0466df
07f3d5b
 
 
 
 
 
 
 
4a90639
4bb8e69
 
90152c0
 
 
 
4bb8e69
 
4a90639
 
6ee4f1f
 
dc9694c
07f3d5b
4bb8e69
 
07f3d5b
4bb8e69
 
81db43f
4bb8e69
4d683c9
81db43f
07f3d5b
4bb8e69
07f3d5b
4bb8e69
 
81db43f
4bb8e69
a1b3fa3
1a05abf
a1b3fa3
07f3d5b
 
 
 
d4b9a9c
38ab96b
 
1df7a23
86cd53a
38ab96b
 
230a27d
86cd53a
07f3d5b

import gradio as gr
import requests

import json
import os


APIKEY = os.environ.get("APIKEY")
APISECRET = os.environ.get("APISECRET")

def predict(text, seed, out_seq_length, min_gen_length, sampling_strategy, 
    num_beams, length_penalty, no_repeat_ngram_size, 
    temperature, topk, topp):
    global APIKEY
    global APISECRET
    
    if text == '':
        return 'Input should not be empty!'

    url = 'https://models.aminer.cn/os/api/api/v2/completions_130B'

    payload = json.dumps({
        "apikey": APIKEY,
        "apisecret": APISECRET ,
        "model_name": "glm-130b-v1",
        "prompt": text,
        "length_penalty": length_penalty,
        "temperature": temperature,
        "top_k": topk,
        "top_p": topp,
        "min_gen_length": min_gen_length,
        "sampling_strategy": sampling_strategy,
        "num_beams": num_beams,
        "max_tokens": out_seq_length,
        "no_repeat_ngram": no_repeat_ngram_size,
        "quantization": "int4",
        "seed": seed
    })

    headers = {
        'Content-Type': 'application/json'
    }

    try:
        response = requests.request("POST", url, headers=headers, data=payload, timeout=(20, 100)).json()
    except Exception as e:
        return 'Timeout! Please wait a few minutes and retry'
    
    if response['status'] == 1:
        return response['message']['errmsg']
    
    answer = response['result']['output']['raw']
    if isinstance(answer, list):
        answer = answer[0]
    
    answer = answer.replace('[</s>]', '')
    
    return answer


if __name__ == "__main__":

    en_fil = ['The Starry Night is an oil-on-canvas painting by [MASK] in June 1889.']
    en_gen = ['Question: The best winter resort destination? Answer: [gMASK]'] #['Eight planets in solar system are [gMASK]']
    ch_fil = ['凯旋门位于意大利米兰市古城堡旁。1807年为纪念[MASK]而建，门高25米，顶上矗立两武士青铜古兵车铸像。']
    ch_gen = ['三亚位于海南岛的最南端,是中国最南部的热带滨海旅游城市 [gMASK]']
    en_to_ch = ['Pencil in Chinese is [MASK].']
    ch_to_en = ['"我思故我在"的英文是"[MASK]"。']

    examples = [en_fil, en_gen, ch_fil, ch_gen, en_to_ch, ch_to_en]

    with gr.Blocks() as demo:
        gr.Markdown(
            """
            Dear friends,
            
            Nice to meet you here! This is a toy demo of GLM-130B, an open bilingual pre-trained model from Tsinghua Univeristy. GLM-130B uses two different mask tokens: `[MASK]` for short blank filling and `[gMASK]` for left-to-right long text generation. When the input does not contain any MASK token, `[gMASK]` will be automatically appended to the end of the text. We recommend that you use `[MASK]` to try text fill-in-the-blank to reduce wait time (ideally within seconds without queuing).
            
            This demo is a raw language model **without** instruction fine-tuning (which is applied to FLAN-* series) and RLHF (which is applied to ChatGPT); its ability is roughly between OpenAI `davinci` and `text-davinci-001`. Thus, it is currently worse than ChatGPT and other instruction fine-tuned models :(
            
            However, we are sparing no effort to improve it, and its updated versions will meet you soon! If you find the open-source effort useful, please star our [GitHub repo](https://github.com/THUDM/GLM-130B) to encourage our following development :)
            """)

        with gr.Row():
            with gr.Column():
                model_input = gr.Textbox(lines=7, placeholder='Input something in English or Chinese', label='Input')
                with gr.Row():
                    gen = gr.Button("Generate")
                    clr = gr.Button("Clear")
                   
            outputs = gr.Textbox(lines=7, label='Output')
                
        gr.Markdown(
            """
            Generation Parameter
            """)
        with gr.Row():
            with gr.Column():
                seed = gr.Slider(maximum=100000, value=1234, step=1, label='Seed')
                out_seq_length = gr.Slider(maximum=256, value=128, minimum=32, step=1, label='Output Sequence Length')
            with gr.Column():
                min_gen_length = gr.Slider(maximum=64, value=0, step=1, label='Min Generate Length')
                sampling_strategy = gr.Radio(choices=['BeamSearchStrategy', 'BaseStrategy'], value='BaseStrategy', label='Search Strategy')

        with gr.Row():
            with gr.Column():
                # beam search
                gr.Markdown(
                    """
                    BeamSearchStrategy
                    """)
                num_beams = gr.Slider(maximum=4, value=2, minimum=1, step=1, label='Number of Beams')
                length_penalty = gr.Slider(maximum=1, value=1, minimum=0, label='Length Penalty')
                no_repeat_ngram_size = gr.Slider(maximum=5, value=3, minimum=1, step=1, label='No Repeat Ngram Size')
            with gr.Column():
                # base search
                gr.Markdown(
                    """
                    BaseStrategy
                    """)
                temperature = gr.Slider(maximum=1, value=1.0, minimum=0, label='Temperature')
                topk = gr.Slider(maximum=40, value=0, minimum=0, step=1, label='Top K')
                topp = gr.Slider(maximum=1, value=0.7, minimum=0, label='Top P')
            
        inputs = [model_input, seed, out_seq_length, min_gen_length, sampling_strategy, num_beams, length_penalty, no_repeat_ngram_size, temperature, topk, topp]
        gen.click(fn=predict, inputs=inputs, outputs=outputs)
        clr.click(fn=lambda value: gr.update(value=""), inputs=clr, outputs=model_input)
        
        gr_examples = gr.Examples(examples=examples, inputs=model_input)
        
        gr.Markdown(
            """
            Disclaimer inspired from [BLOOM](https://huggingface.co/spaces/bigscience/bloom-book)
            
            GLM-130B was trained on web-crawled data, so it's hard to predict how GLM-130B will respond to particular prompts; harmful or otherwise offensive content may occur without warning. We prohibit users from knowingly generating or allowing others to knowingly generate harmful content, including Hateful, Harassment, Violence, Adult, Political, Deception, etc. 
            """)

    demo.launch()