File size: 10,056 Bytes
9e1636e
 
 
 
 
 
 
 
bc08d80
9b436aa
9e1636e
 
982cf4e
 
9e1636e
982cf4e
9e1636e
 
 
 
70507e7
09896d5
8c9d1a2
e9d94bd
 
d60776f
70507e7
5fb9f60
7d54a11
d8ca67b
ee61882
3736c10
 
 
7d54a11
3736c10
 
7ef6710
467715f
3242987
5fb9f60
7d54a11
7ef6710
f50ab84
6c702e3
9e1636e
982cf4e
 
72b2420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e1636e
72b2420
9e1636e
 
 
 
 
 
43c5b06
9e1636e
 
ed36b09
43c5b06
 
9e1636e
43c5b06
9e1636e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43c5b06
9e1636e
 
 
 
 
 
 
43c5b06
 
9e1636e
0fb8589
 
 
 
 
d1cc2c0
982cf4e
 
 
 
 
 
06d3d78
982cf4e
9e1636e
982cf4e
 
1b39c53
 
 
982cf4e
9e1636e
 
 
d6acc70
84120b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2623ed
84120b5
a2623ed
84120b5
 
 
 
 
 
 
f2b1918
 
84120b5
 
 
 
65b131b
84120b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317583d
 
d6acc70
 
 
0718219
317583d
d6acc70
 
 
84120b5
9e1636e
 
 
 
 
 
 
cf25f9b
9e1636e
 
 
 
 
 
84120b5
9e1636e
 
d1cc2c0
 
9e1636e
84120b5
9e1636e
 
 
 
84120b5
9e1636e
 
 
84120b5
9e1636e
 
 
 
 
 
 
 
0fb8589
 
 
 
9e1636e
 
f9856aa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import gradio as gr
#from transformers import pipeline 
import torch
from utils import *
from presets import *
from huggingface_hub import login
from transformers import LlamaForCausalLM, LlamaTokenizer 

#antwort=""
######################################################################
#Modelle und Tokenizer

#Hugging Chat nutzen
# Create a chatbot connection
#chatbot = hugchat.ChatBot(cookie_path="cookies.json")

#Alternativ mit beliebigen Modellen:
base_model = "project-baize/baize-v2-7b"  #load_8bit = False (in load_tokenizer_and_model)  
#base_model = "MAGAer13/mPLUG-Owl"  #load_8bit = False (in load_tokenizer_and_model)
#base_model = "alexkueck/li-tis-tuned-2"  #load_8bit = False (in load_tokenizer_and_model)
#base_model = "TheBloke/airoboros-13B-HF"  #load_8bit = False (in load_tokenizer_and_model)
#base_model = "EleutherAI/gpt-neo-1.3B"    #load_8bit = False (in load_tokenizer_and_model)
#base_model = "TheBloke/airoboros-13B-HF"   #load_8bit = True
#base_model = "TheBloke/vicuna-13B-1.1-HF"   #load_8bit = ?
#following runs only on GPU upgrade
#base_model = "TheBloke/airoboros-65B-gpt4-1.3-GPTQ"  #model_basename = "airoboros-65b-gpt4-1.3-GPTQ-4bit--1g.act.order"
#base_model = "lmsys/vicuna-13b-v1.3"
#base_model = "gpt2-xl"   # options: ['gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl']

####################################
#Model und Tokenzier laden
tokenizer,model,device = load_tokenizer_and_model(base_model,False)

################################
#Alternativ: Model und Tokenizer für GPT2
#tokenizer,model,device = load_tokenizer_and_model_gpt2(base_model,False)

#Alternativ bloke gpt3 und4 - only with GPU upgarde
#tokenizer,model,device = load_tokenizer_and_model_bloke_gpt(base_model, "airoboros-65b-gpt4-1.3-GPTQ-4bit--1g.act.order")
  
#Alternativ Model und Tokenzier laden für Baize
#tokenizer,model,device = load_tokenizer_and_model_Baize(base_model,False)

########################################################################
#Chat KI nutzen, um Text zu generieren...
def predict(text,
            chatbotGr,
            history,
            top_p,
            temperature,
            max_length_tokens,
            max_context_length_tokens,):
    if text=="":
        yield chatbotGr,history,"Empty context."
        return 
    try:
        model
    except:
        yield [[text,"No Model Found"]],[],"No Model Found"
        return

    inputs = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)
    if inputs is None:
        yield chatbotGr,history,"Input too long."
        return 
    else:
        prompt,inputs=inputs
        begin_length = len(prompt)
        
    input_ids = inputs["input_ids"][:,-max_context_length_tokens:].to(device)
    torch.cuda.empty_cache()

    #torch.no_grad() bedeutet, dass für die betreffenden tensoren keine Ableitungen berechnet werden bei der backpropagation 
    #hier soll das NN ja auch nicht geändert werden 8backprop ist nicht nötig), da es um interference-prompts geht!
    with torch.no_grad():
        #die vergangenen prompts werden alle als Tupel in history abgelegt sortiert nach 'Human' und 'AI'- dass sind daher auch die stop-words, die den jeweils nächsten Eintrag kennzeichnen        
        for x in greedy_search(input_ids,model,tokenizer,stop_words=["[|Human|]", "[|AI|]"],max_length=max_length_tokens,temperature=temperature,top_p=top_p):
            if is_stop_word_or_prefix(x,["[|Human|]", "[|AI|]"]) is False:
                if "[|Human|]" in x:
                    x = x[:x.index("[|Human|]")].strip()
                if "[|AI|]" in x:
                    x = x[:x.index("[|AI|]")].strip() 
                x = x.strip()   
                a, b=   [[y[0],convert_to_markdown(y[1])] for y in history]+[[text, convert_to_markdown(x)]],history + [[text,x]]
                yield a, b, "Generating..."
            if shared_state.interrupted:
                shared_state.recover()
                try:
                    yield a, b, "Stop: Success"
                    return
                except:
                    pass
    del input_ids
    gc.collect()
    torch.cuda.empty_cache()
    
    try:
        yield a,b,"Generate: Success"
    except:
        pass


def reset_chat():
    #id_new = chatbot.new_conversation()
    #chatbot.change_conversation(id_new)
    reset_textbox()

#wenn 'Stop' Button geklickt, dann Message dazu und das Eingabe-Fenster leeren
def cancel_outputing():
    reset_textbox()
    return "Stop Done"
    
##########################################################
#Übersetzungs Ki nutzen
def translate():
    return "Kommt noch!"
    
#Programmcode KI
def coding():
    return "Kommt noch!"

#######################################################################
#Darstellung mit Gradio

with open("custom.css", "r", encoding="utf-8") as f:
    customCSS = f.read()
    
with gr.Blocks(theme=small_and_beautiful_theme) as demo:
    history = gr.State([])
    user_question = gr.State("")
    gr.Markdown("KIs am LI - wähle aus, was du bzgl. KI-Bots ausprobieren möchtest!")
    with gr.Tabs():
        with gr.TabItem("LI-Chat"):
            with gr.Row():
                gr.HTML(title)
                status_display = gr.Markdown("Erfolg", elem_id="status_display")
            gr.Markdown(description_top)
            with gr.Row(scale=1).style(equal_height=True):
                with gr.Column(scale=5):
                    with gr.Row(scale=1):
                        chatbotGr = gr.Chatbot(elem_id="LI_chatbot").style(height="100%")
                    with gr.Row(scale=1):
                        with gr.Column(scale=12):
                            user_input = gr.Textbox(
                                show_label=False, placeholder="Gib deinen Text / Frage ein."
                            ).style(container=False)
                        with gr.Column(min_width=100, scale=1):
                            submitBtn = gr.Button("Absenden")
                        with gr.Column(min_width=100, scale=1):
                            cancelBtn = gr.Button("Stoppen")
                    with gr.Row(scale=1):
                        emptyBtn = gr.Button(
                            "🧹 Neuer Chat",
                        )
                with gr.Column():
                    with gr.Column(min_width=50, scale=1):
                        with gr.Tab(label="Nur zum Testen:"):
                            gr.Markdown("# Parameter")
                            top_p = gr.Slider(
                                minimum=-0,
                                maximum=1.0,
                                value=0.95,
                                step=0.05,
                                interactive=True,
                                label="Top-p",
                            )
                            temperature = gr.Slider(
                                minimum=0.1,
                                maximum=2.0,
                                value=1,
                                step=0.1,
                                interactive=True,
                                label="Temperature",
                            )
                            max_length_tokens = gr.Slider(
                                minimum=0,
                                maximum=512,
                                value=512,
                                step=8,
                                interactive=True,
                                label="Max Generation Tokens",
                            )
                            max_context_length_tokens = gr.Slider(
                                minimum=0,
                                maximum=4096,
                                value=2048,
                                step=128,
                                interactive=True,
                                label="Max History Tokens",
                            )
            gr.Markdown(description)

        with gr.TabItem("Übersetzungen"):
            with gr.Row():
                    gr.Textbox(
                                show_label=False, placeholder="Ist noch in Arbeit..."
                            ).style(container=False)
        with gr.TabItem("Code-Generierungen"):
            with gr.Row():
                    gr.Textbox(
                                show_label=False, placeholder="Ist noch in Arbeit..."
                            ).style(container=False)
    
    predict_args = dict(
        fn=predict,
        inputs=[
            user_question,
            chatbotGr,
            history,
            top_p,
            temperature,   #Variation der Antworten - stand. 1.0
            max_length_tokens,
            max_context_length_tokens,
        ],
        outputs=[chatbotGr, history, status_display],
        show_progress=True,
    )
        
    #neuer Chat
    reset_args = dict(
        #fn=reset_chat, inputs=[], outputs=[user_input, status_display]
        fn=reset_textbox, inputs=[], outputs=[user_input, status_display]
    )
            
    # Chatbot
    transfer_input_args = dict(
        fn=transfer_input, inputs=[user_input], outputs=[user_question, user_input, submitBtn], show_progress=True
    )
        
    #Listener auf Start-Click auf Button oder Return
    predict_event1 = user_input.submit(**transfer_input_args).then(**predict_args)
    predict_event2 = submitBtn.click(**transfer_input_args).then(**predict_args)
        
    #Listener, Wenn reset...
    emptyBtn.click(
        reset_state,
        outputs=[chatbotGr, history, status_display],
        show_progress=True,
    )
    emptyBtn.click(**reset_args)

    #Berechnung oder Ausgabe anhalten (kann danach fortgesetzt werden)
    cancelBtn.click(cancel_outputing, [], [status_display], cancels=[predict_event1,predict_event2]) 
    #cancelBtn.click(lambda: None, None, chatbotGr, queue=False)

demo.title = "LI Chat"
#demo.queue(concurrency_count=1).launch(share=True) 
demo.queue(concurrency_count=1).launch(debug=True)