File size: 2,945 Bytes
95167e7
 
 
 
c02118d
95167e7
 
 
 
 
80dce94
 
7f42d00
6e243f5
b09a184
 
95167e7
 
 
 
 
 
 
 
 
44bd5ae
95167e7
44bd5ae
95167e7
44bd5ae
95167e7
 
 
 
 
 
 
 
 
 
 
 
 
44bd5ae
0299602
95167e7
 
a52e921
95167e7
 
 
c6a6c04
95167e7
 
 
e458a61
95167e7
 
 
 
 
44bd5ae
b14f3c1
95167e7
 
 
 
 
44bd5ae
95167e7
39f1f96
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
from huggingface_hub import Repository, InferenceClient
import os
import json
import re

API_TOKEN = os.environ.get("API_TOKEN")

API_ENDPOINTS = {
    "Falcon": "tiiuae/falcon-180B-chat",
    "Llama": "meta-llama/Llama-2-70b-chat-hf",
    "Mistral": "mistralai/Mistral-7B-v0.1",
    "Mistral*": "mistralai/Mistral-7B-Instruct-v0.1",
    "Open-3.5": "openchat/openchat_3.5",
    "Xistral": "mistralai/Mixtral-8x7B-v0.1",
    "Xistral*": "mistralai/Mixtral-8x7B-Instruct-v0.1",
}

CHOICES = []
CLIENTS = {}

for model_name, model_endpoint in API_ENDPOINTS.items():
    CHOICES.append(model_name)
    CLIENTS[model_name] = InferenceClient(model_endpoint, headers = { "Authorization": f"Bearer {API_TOKEN}" })

def predict(input, model, temperature, top_p, top_k, rep_p, max_tokens, stop_seqs, seed):
    stops = json.loads(stop_seqs)
   
    response = CLIENTS[model].text_generation(
        input,
        temperature = temperature,
        max_new_tokens = max_tokens,
        top_p = top_p,
        top_k = top_k,
        repetition_penalty = rep_p,
        stop_sequences = stops,
        do_sample = True,
        seed = seed,
        stream = False,
        details = False,
        return_full_text = False
    )

    return response

with gr.Blocks() as demo:
    with gr.Row(variant = "panel"):
        gr.Markdown("✡️ This is a private LLM Space owned within STC Holdings!")
            
    with gr.Row():
        with gr.Column():
            input = gr.Textbox(label = "Input", value = "", lines = 2)
            run = gr.Button("▶")
            
        with gr.Column():
            model = gr.Dropdown(choices = CHOICES, value = next(iter(API_ENDPOINTS)), interactive = True, label = "Model")
            temperature = gr.Slider( minimum = 0, maximum = 2, value = 1, step = 0.01, interactive = True, label = "Temperature" )
            top_p = gr.Slider( minimum = 0.01, maximum = 0.99, value = 0.95, step = 0.01, interactive = True, label = "Top P" )
            top_k = gr.Slider( minimum = 1, maximum = 2048, value = 50, step = 1, interactive = True, label = "Top K" )
            rep_p = gr.Slider( minimum = 0.01, maximum = 2, value = 1.2, step = 0.01, interactive = True, label = "Repetition Penalty" )
            max_tokens = gr.Slider( minimum = 1, maximum = 2048, value = 32, step = 64, interactive = True, label = "Max New Tokens" )
            stop_seqs = gr.Textbox( value = "", interactive = True, label = "Stop Sequences ( JSON Array / 4 Max )" )
            seed = gr.Slider( minimum = 0, maximum = 9007199254740991, value = 42, step = 1, interactive = True, label = "Seed" )
            
    with gr.Row():
        with gr.Column():
            output = gr.Textbox(label = "Output", value = "", lines = 50)

    run.click(predict, inputs = [input, model, temperature, top_p, top_k, rep_p, max_tokens, stop_seqs, seed], outputs = [output], queue = False)
    
demo.launch(show_api = True)