Spaces:

hugo1234
/

galileo

Runtime error

App Files Files Community

hugo1234 commited on Jun 2, 2023

Commit

f6d5624

1 Parent(s): e64ca11

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -625

app.py CHANGED Viewed

@@ -1,645 +1,220 @@
-import os
-os.system('pip install bitsandbytes')
-os.system('pip install -q datasets loralib sentencepiece accelerate')
-# os.system('pip install -q git+https://github.com/zphang/transformers@c3dc391')
-# os.system('pip install -q git+https://github.com/huggingface/transformers')
-os.system('pip install -q git+https://github.com/mbehm/transformers')
-os.system('pip install -q git+https://github.com/huggingface/peft.git')
-# os.system('pip install gradio')
-# os.system('pip install torch')
-# os.system('pip install peft')
-# os.system('pip install transformers')
-os.system('pip install tenacity')
-os.system('pip install scipy')
-# os.system('pip install sentencepiece')
-import re
-import yaml
-import gc
-import copy
-import time
-from tenacity import RetryError
-from tenacity import retry, stop_after_attempt, wait_fixed
 import gradio as gr
-# import torch
-from peft import PeftModel
-from transformers import (
-    LLaMATokenizer,
-    LlamaForCausalLM,
-    GenerationConfig,
-    AutoModelForCausalLM,
-    AutoModelForSeq2SeqLM,
-    AutoTokenizer,
-    LogitsProcessorList,
-    MinNewTokensLengthLogitsProcessor,
-    TemperatureLogitsWarper,
-    TopPLogitsWarper,
-    MinLengthLogitsProcessor
-)
-# assert torch.cuda.is_available(), "Change the runtime type to GPU"
-# constants
-num_of_characters_to_keep = 1000
-# regex
-html_tag_pattern = re.compile(r"<.*?>")
-multi_line_pattern = re.compile(r"\n+")
-multi_space_pattern = re.compile(r"(  )")
-multi_br_tag_pattern = re.compile(re.compile(r'<br>\s*(<br>\s*)*'))
-# repl is short for replacement
-repl_linebreak = "\n"
-repl_empty_str = ""
-TITLE = "Galileo"
-ABSTRACT = """
-Stambecco is a Italian Instruction-following model based on the [LLaMA](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) model. It comes in two versions: 7b and 13b parameters. It is trained on an Italian version of the [GPT-4-LLM](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM) dataset, a dataset of `GPT-4` generated instruction-following data.
-This demo is intended to show and evaluate the conversational capabilities of the model.
-For more information, please visit [the project's website](https://github.com/mchl-labs/stambecco).
-NOTE: Too long input (context, instruction) will not be allowed. Please keep context < 500 and instruction < 150
-"""
-BOTTOM_LINE = """
-By default, this demo runs with streaming mode, but you can also run with dynamic batch generation model.
-Stambecco is built on the same concept as Standford Alpaca project, but using LoRA it lets us train and inference on a smaller GPUs such as RTX4090 for 7B version. Also, we could build very small size of checkpoints on top of base models thanks to [🤗 transformers](https://huggingface.co/docs/transformers/index), [🤗 peft](https://github.com/huggingface/peft), and [bitsandbytes](https://github.com/TimDettmers/bitsandbytes/tree/main) libraries.
-This demo currently runs 8Bit 7b version of the model.
-"""
-DEFAULT_EXAMPLES = {
-    "Typical Questions": [
-        {
-            "title": "Parlami di Giulio Cesare.",
-            "examples": [
-                ["1", "Scrivi un articolo su Giulio Cesare"],
-                ["2", "Davvero?"],
-                ["3", "Quanto era ricco Giulio Cesare?"],
-                ["4", "Chi è stato il suo successore?"],
-            ]
-        },
-        {
-            "title": "Parigi",
-            "examples": [
-                ["1", "Scrivi un tema sulla città di Parigi"],
-                ["2", "Fai un elenco di 5 posti da visitare assolutamente"],
-                ["3", "Quali eventi importanti della Storia sono avvenuti a Parigi?"],
-                ["4", "Quale è il periodo migliore per visitare Parigi?"],
-            ]
-        },
-        {
-            "title": "Scrivi un programma in Python che stampi i primi 10 numeri di Fibonacci",
-            "examples": [
-                ["1", "Scrivi un programma in Python che stampi i primi 10 numeri di Fibonacci"],
-                ["2", "Potresti spiegarmi come funziona il codice?"],
-                ["3", "Cos'è la ricorsione?"],
-            ]
-        }
-    ],
-}
-SPECIAL_STRS = {
-    "continue": "continua",
-    "summarize": "Di cosa abbiamo discusso finora? Descrivi nella user's view."
-}
-PARENT_BLOCK_CSS = """
-#col_container {
-    width: 95%;
-    margin-left: auto;
-    margin-right: auto;
-}
-#chatbot {
-    height: 500px;
-    overflow: auto;
-}
-"""
-def load_model(
-    base="decapoda-research/llama-7b-hf",
-    finetuned="mchl-labs/stambecco-7b-plus",
-):
-    tokenizer = LLaMATokenizer.from_pretrained(base)
-    tokenizer.pad_token_id = 0
-    tokenizer.padding_side = "left"
-    model = LlamaForCausalLM.from_pretrained(
-        base,
-        load_in_8bit=True,
-        device_map="from_pretrained",
-#        load_in_8bit_fp32_cpu_offload=True
-    )
-#    model = PeftModel.from_pretrained(model, finetuned, device_map={'': 0})
-    model = PeftModel.from_pretrained(model, finetuned)
-    return model, tokenizer
-def get_generation_config(path):
-    with open(path, 'rb') as f:
-        generation_config = yaml.safe_load(f.read())
-    return GenerationConfig(**generation_config["generation_config"])
-def generate_prompt(prompt, histories, ctx=None, partial=False):
-    convs = f"""Di seguito è riportata una cronologia delle istruzioni che descrivono le tasks, abbinate a un input che fornisce ulteriore contesto. Scrivi una risposta che completi adeguatamente la richiesta ricordando la cronologia della conversazione.
-"""
-    if ctx is not None:
-        convs = f"""### Input: {ctx}
-"""
-    sub_convs = ""
-    start_idx = 0
-    for idx, history in enumerate(histories):
-        history_prompt = history[0]
-        history_response = history[1]
-        if history_response == "✅ Riepilogo della conversazione effettuato e impostato come contesto" or history_prompt == SPECIAL_STRS["summarize"]:
-            start_idx = idx
-    # drop the previous conversations if user has summarized
-    for history in histories[start_idx if start_idx == 0 else start_idx+1:]:
-        history_prompt = history[0]
-        history_response = history[1]
-        history_response = history_response.replace("<br>", "\n")
-        history_response = re.sub(
-            html_tag_pattern, repl_empty_str, history_response
-        )
-        sub_convs = sub_convs + f"""### Istruzione: {history_prompt}
-### Risposta: {history_response}
-"""
-    sub_convs = sub_convs + f"""### Istruzione: {prompt}
-### Risposta:"""
-    convs = convs + sub_convs
-    return sub_convs if partial else convs, len(sub_convs)
-def common_post_process(original_str):
-    original_str = re.sub(
-        multi_line_pattern, repl_linebreak, original_str
-    )
-    return original_str
-def post_process_stream(bot_response):
-    # sometimes model spits out text containing
-    # "### Risposta:" and "### Istruzione: -> in this case, we want to stop generating
-    if "### Risposta:" in bot_response or "### Input:" in bot_response:
-        bot_response = bot_response.replace("### Risposta:", '').replace("### Input:", '').strip()
-        return bot_response, True
-    return common_post_process(bot_response), False
-def post_process_batch(bot_response):
-    bot_response = bot_response.split("### Risposta:")[-1].strip()
-    return common_post_process(bot_response)
-def post_processes_batch(bot_responses):
-    return [post_process_batch(r) for r in bot_responses]
-def get_output_batch(
-    model, tokenizer, prompts, generation_config
-):
-    if len(prompts) == 1:
-        encoding = tokenizer(prompts, return_tensors="pt")
-        input_ids = encoding["input_ids"].cuda()
-        generated_id = model.generate(
-            input_ids=input_ids,
-            generation_config=generation_config,
-            max_new_tokens=256
-        )
-        decoded = tokenizer.batch_decode(generated_id)
-        del input_ids, generated_id
-        torch.cuda.empty_cache()
-        return decoded
     else:
-        encodings = tokenizer(prompts, padding=True, return_tensors="pt").to('cuda')
-        generated_ids = model.generate(
-            **encodings,
-            generation_config=generation_config,
-            max_new_tokens=256
-        )
-        decoded = tokenizer.batch_decode(generated_ids)
-        del encodings, generated_ids
-        torch.cuda.empty_cache()
-        return decoded
-# StreamModel is borrowed from basaran project
-# please find more info about it -> https://github.com/hyperonym/basaran
-class StreamModel:
-    """StreamModel wraps around a language model to provide stream decoding."""
-    def __init__(self, model, tokenizer):
-        super().__init__()
-        self.model = model
-        self.tokenizer = tokenizer
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.processor = LogitsProcessorList()
-        self.processor.append(TemperatureLogitsWarper(0.9))
-        self.processor.append(TopPLogitsWarper(0.75))
-    def __call__(
-        self,
-        prompt,
-        min_tokens=0,
-        max_tokens=16,
-        temperature=1.0,
-        top_p=1.0,
-        n=1,
-        logprobs=0,
-    ):
-        """Create a completion stream for the provided prompt."""
-        input_ids = self.tokenize(prompt)
-        logprobs = max(logprobs, 0)
-        # bigger than 1
-        chunk_size = 2
-        chunk_count = 0
-        # Generate completion tokens.
-        final_tokens = torch.empty(0)
-        for tokens in self.generate(
-            input_ids[None, :].repeat(n, 1),
-            logprobs=logprobs,
-            min_new_tokens=min_tokens,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-        ):
-            if chunk_count < chunk_size:
-                chunk_count = chunk_count + 1
-            final_tokens = torch.cat((final_tokens, tokens.to("cpu")))
-            if chunk_count == chunk_size-1:
-                chunk_count = 0
-                yield self.tokenizer.decode(final_tokens, skip_special_tokens=True)
-        if chunk_count > 0:
-            yield self.tokenizer.decode(final_tokens, skip_special_tokens=True)
-        del final_tokens, input_ids
-        if self.device == "cuda":
-            torch.cuda.empty_cache()
-    def _infer(self, model_fn, **kwargs):
-        with torch.inference_mode():
-            return model_fn(**kwargs)
-    def tokenize(self, text):
-        """Tokenize a string into a tensor of token IDs."""
-        batch = self.tokenizer.encode(text, return_tensors="pt")
-        return batch[0].to(self.device)
-    def generate(self, input_ids, logprobs=0, **kwargs):
-        """Generate a stream of predicted tokens using the language model."""
-        # Store the original batch size and input length.
-        batch_size = input_ids.shape[0]
-        input_length = input_ids.shape[-1]
-        # Separate model arguments from generation config.
-        config = self.model.generation_config
-        config = copy.deepcopy(config)
-        kwargs = config.update(**kwargs)
-        kwargs["output_attentions"] = False
-        kwargs["output_hidden_states"] = False
-        kwargs["use_cache"] = True
-        # Collect special token IDs.
-        pad_token_id = config.pad_token_id
-        bos_token_id = config.bos_token_id
-        eos_token_id = config.eos_token_id
-        if isinstance(eos_token_id, int):
-            eos_token_id = [eos_token_id]
-        if pad_token_id is None and eos_token_id is not None:
-            pad_token_id = eos_token_id[0]
-        # Generate from eos if no input is specified.
-        if input_length == 0:
-            input_ids = input_ids.new_ones((batch_size, 1)).long()
-            if eos_token_id is not None:
-                input_ids = input_ids * eos_token_id[0]
-            input_length = 1
-        # Keep track of which sequences are already finished.
-        unfinished = input_ids.new_ones(batch_size)
-        # Start auto-regressive generation.
-        while True:
-            inputs = self.model.prepare_inputs_for_generation(
-                input_ids, **kwargs
-            )  # noqa: E501
-            outputs = self._infer(
-                self.model,
-                **inputs,
-                # return_dict=True,
-                output_attentions=False,
-                output_hidden_states=False,
-            )
-            # Pre-process the probability distribution of the next tokens.
-            logits = outputs.logits[:, -1, :]
-            with torch.inference_mode():
-                logits = self.processor(input_ids, logits)
-            probs = torch.nn.functional.softmax(logits, dim=-1)
-            # Select deterministic or stochastic decoding strategy.
-            if (config.top_p is not None and config.top_p <= 0) or (
-                config.temperature is not None and config.temperature <= 0
-            ):
-                tokens = torch.argmax(probs, dim=-1)[:, None]
-            else:
-                tokens = torch.multinomial(probs, num_samples=1)
-            tokens = tokens.squeeze(1)
-            # Finished sequences should have their next token be a padding.
-            if pad_token_id is not None:
-                tokens = tokens * unfinished + pad_token_id * (1 - unfinished)
-            # Append selected tokens to the inputs.
-            input_ids = torch.cat([input_ids, tokens[:, None]], dim=-1)
-            # Mark sequences with eos tokens as finished.
-            if eos_token_id is not None:
-                not_eos = sum(tokens != i for i in eos_token_id)
-                unfinished = unfinished.mul(not_eos.long())
-            # Set status to -1 if exceeded the max length.
-            status = unfinished.clone()
-            if input_ids.shape[-1] - input_length >= config.max_new_tokens:
-                status = 0 - status
-            # Yield predictions and status.
-            yield tokens
-            # Stop when finished or exceeded the max length.
-            if status.max() <= 0:
-                break
-generation_config = get_generation_config(
-    "./generation_config_default.yaml"
-)
-model, tokenizer = load_model(
-    # base="decapoda-research/llama-13b-hf",
-    # finetuned="mchl-labs/stambecco-13b-plus",
-)
-stream_model = StreamModel(model, tokenizer)
-def chat_stream(
-    context,
-    instruction,
-    state_chatbot,
-):
-    if len(context) > 1000 or len(instruction) > 300:
-        raise gr.Error("Context or prompt is too long!")
-    bot_summarized_response = ''
-    # user input should be appropriately formatted (don't be confused by the function name)
-    instruction_display = instruction
-    instruction_prompt, conv_length = generate_prompt(instruction, state_chatbot, context)
-    if conv_length > num_of_characters_to_keep:
-        instruction_prompt = generate_prompt(SPECIAL_STRS["summarize"], state_chatbot, context, partial=True)[0]
-        state_chatbot = state_chatbot + [
-            (
-                None,
-                "![](https://s2.gifyu.com/images/icons8-loading-circle.gif) Conversazione troppo lunga, sto riassumendo..."
-            )
-        ]
-        yield (state_chatbot, state_chatbot, context)
-        bot_summarized_response = get_output_batch(
-            model, tokenizer, [instruction_prompt], generation_config
-        )[0]
-        bot_summarized_response = bot_summarized_response.split("### Risposta:")[-1].strip()
-        state_chatbot[-1] = (
-            None,
-            "✅ Riepilogo della conversazione effettuato e impostato come contesto"
-        )
-        print(f"bot_summarized_response: {bot_summarized_response}")
-        yield (state_chatbot, state_chatbot, f"{context}. {bot_summarized_response}".strip())
-    instruction_prompt = generate_prompt(instruction, state_chatbot, f"{context} {bot_summarized_response}")[0]
-    bot_response = stream_model(
-        instruction_prompt,
-        max_tokens=256,
-        temperature=1,
-        top_p=0.9
-    )
-    instruction_display = None if instruction_display == SPECIAL_STRS["continue"] else instruction_display
-    state_chatbot = state_chatbot + [(instruction_display, None)]
-    yield (state_chatbot, state_chatbot, f"{context}. {bot_summarized_response}".strip())
-    prev_index = 0
-    agg_tokens = ""
-    cutoff_idx = 0
-    for tokens in bot_response:
-        tokens = tokens.strip()
-        cur_token = tokens[prev_index:]
-        if "#" in cur_token and agg_tokens == "":
-            cutoff_idx = tokens.find("#")
-            agg_tokens = tokens[cutoff_idx:]
-        if agg_tokens != "":
-            if len(agg_tokens) < len("### Istruzione:") :
-                agg_tokens = agg_tokens + cur_token
-            elif len(agg_tokens) >= len("### Istruzione:"):
-                if tokens.find("### Istruzione:") > -1:
-                    processed_response, _ = post_process_stream(tokens[:tokens.find("### Istruzione:")].strip())
-                    state_chatbot[-1] = (
-                        instruction_display,
-                        processed_response
-                    )
-                    yield (state_chatbot, state_chatbot, f"{context} {bot_summarized_response}".strip())
-                    break
-                else:
-                    agg_tokens = ""
-                    cutoff_idx = 0
-        if agg_tokens == "":
-            processed_response, to_exit = post_process_stream(tokens)
-            state_chatbot[-1] = (instruction_display, processed_response)
-            yield (state_chatbot, state_chatbot, f"{context} {bot_summarized_response}".strip())
-            if to_exit:
-                break
-        prev_index = len(tokens)
-    yield (
-        state_chatbot,
-        state_chatbot,
-        f"{context} {bot_summarized_response}".strip()
-    )
-def chat_batch(
-    contexts,
-    instructions,
-    state_chatbots,
-):
-    state_results = []
-    ctx_results = []
-    instruct_prompts = [
-        generate_prompt(instruct, histories, ctx)
-        for ctx, instruct, histories in zip(contexts, instructions, state_chatbots)
-    ]
-    bot_responses = get_output_batch(
-        model, tokenizer, instruct_prompts, generation_config
-    )
-    bot_responses = post_processes_batch(bot_responses)
-    for ctx, instruction, bot_response, state_chatbot in zip(contexts, instructions, bot_responses, state_chatbots):
-        new_state_chatbot = state_chatbot + [('' if instruction == SPECIAL_STRS["continue"] else instruction, bot_response)]
-        ctx_results.append(gr.Textbox.update(value=bot_response) if instruction == SPECIAL_STRS["summarize"] else ctx)
-        state_results.append(new_state_chatbot)
-    return (state_results, state_results, ctx_results)
-def reset_textbox():
-    return gr.Textbox.update(value='')
-def reset_everything(
-    context_txtbox,
-    instruction_txtbox,
-    state_chatbot):
-    state_chatbot = []
-    return (
-        state_chatbot,
-        state_chatbot,
-        gr.Textbox.update(value=''),
-        gr.Textbox.update(value=''),
-    )
-with gr.Blocks(css=PARENT_BLOCK_CSS) as demo:
-    state_chatbot = gr.State([])
-    with gr.Column(elem_id='col_container'):
-        gr.Markdown(f"## {TITLE}\n\n\n{ABSTRACT}")
-        with gr.Accordion("Context Setting", open=False):
-            context_txtbox = gr.Textbox(placeholder="Surrounding information to AI", label="Enter Context")
-            hidden_txtbox = gr.Textbox(placeholder="", label="Order", visible=False)
-        chatbot = gr.Chatbot(elem_id='chatbot', label="Stambecco")
-        instruction_txtbox = gr.Textbox(placeholder="What do you want to say to AI?", label="Instruction")
-        with gr.Row():
-            cancel_btn = gr.Button(value="Cancel")
-            reset_btn = gr.Button(value="Reset")
-        with gr.Accordion("Helper Buttons", open=False):
-            gr.Markdown(f"`Continue` lets AI to complete the previous incomplete answers. `Summarize` lets AI to summarize the conversations so far.")
-            continue_txtbox = gr.Textbox(value=SPECIAL_STRS["continue"], visible=False)
-            summrize_txtbox = gr.Textbox(value=SPECIAL_STRS["summarize"], visible=False)
-            continue_btn = gr.Button(value="Continue")
-            summarize_btn = gr.Button(value="Summarize")
-        gr.Markdown("#### Examples")
-        for _, (category, examples) in enumerate(DEFAULT_EXAMPLES.items()):
-            with gr.Accordion(category, open=False):
-                if category == "Identity":
-                    for item in examples:
-                        with gr.Accordion(item["title"], open=False):
-                            gr.Examples(
-                                examples=item["examples"],
-                                inputs=[
-                                    hidden_txtbox, context_txtbox, instruction_txtbox
-                                ],
-                                label=None
                             )
-                else:
-                    for item in examples:
-                        with gr.Accordion(item["title"], open=False):
-                            gr.Examples(
-                                examples=item["examples"],
-                                inputs=[
-                                    hidden_txtbox, instruction_txtbox
-                                ],
-                                label=None
                             )
-        gr.Markdown(f"{BOTTOM_LINE}")
-    send_event = instruction_txtbox.submit(
-        chat_stream,
-        [context_txtbox, instruction_txtbox, state_chatbot],
-        [state_chatbot, chatbot, context_txtbox],
-    )
-    reset_event = instruction_txtbox.submit(
-        reset_textbox,
-        [],
-        [instruction_txtbox],
-    )
-    continue_event = continue_btn.click(
-        chat_stream,
-        [context_txtbox, continue_txtbox, state_chatbot],
-        [state_chatbot, chatbot, context_txtbox],
-    )
-    reset_continue_event = continue_btn.click(
-        reset_textbox,
-        [],
-        [instruction_txtbox],
     )
-    summarize_event = summarize_btn.click(
-        chat_stream,
-        [context_txtbox, summrize_txtbox, state_chatbot],
-        [state_chatbot, chatbot, context_txtbox],
     )
-    summarize_reset_event = summarize_btn.click(
-        reset_textbox,
-        [],
-        [instruction_txtbox],
     )
-    cancel_btn.click(
-        None, None, None,
-        cancels=[
-            send_event, continue_event, summarize_event
-        ]
     )
-    reset_btn.click(
-        reset_everything,
-        [context_txtbox, instruction_txtbox, state_chatbot],
-        [state_chatbot, chatbot, context_txtbox, instruction_txtbox],
-        cancels=[
-            send_event, continue_event, summarize_event
-        ]
-    )
-demo.queue(
-    concurrency_count=1,
-    max_size=100,
-).launch(
-    max_threads=5,
-    server_name="0.0.0.0",
-    share=True
-)

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 import gradio as gr
+#from transformers import pipeline
+import torch
+from utils import *
+from presets import *
+#antwort=""
+######################################################################
+#Modelle und Tokenizer
+#Hugging Chat nutzen
+# Create a chatbot connection
+#chatbot = hugchat.ChatBot(cookie_path="cookies.json")
+#Alternativ mit beliebigen Modellen:
+#base_model = "project-baize/baize-v2-7b"
+base_model = "EleutherAI/gpt-neo-1.3B"
+tokenizer,model,device = load_tokenizer_and_model(base_model)
+########################################################################
+#Chat KI nutzen, um Text zu generieren...
+def predict(text,
+            chatbotGr,
+            history,
+            top_p,
+            temperature,
+            max_length_tokens,
+            max_context_length_tokens,):
+    if text=="":
+        yield chatbotGr,history,"Empty context."
+        return
+    try:
+        model
+    except:
+        yield [[text,"No Model Found"]],[],"No Model Found"
+        return
+    inputs = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)
+    if inputs is None:
+        yield chatbotGr,history,"Input too long."
+        return
     else:
+        prompt,inputs=inputs
+        begin_length = len(prompt)
+    input_ids = inputs["input_ids"][:,-max_context_length_tokens:].to(device)
+    torch.cuda.empty_cache()
+    #torch.no_grad() bedeutet, dass für die betreffenden tensoren keine Ableitungen berechnet werden bei der backpropagation
+    #hier soll das NN ja auch nicht geändert werden 8backprop ist nicht nötig), da es um interference-prompts geht!
+    with torch.no_grad():
+        #die vergangenen prompts werden alle als Tupel in history abgelegt sortiert nach 'Human' und 'AI'- dass sind daher auch die stop-words, die den jeweils nächsten Eintrag kennzeichnen
+        for x in greedy_search(input_ids,model,tokenizer,stop_words=["[|Human|]", "[|AI|]"],max_length=max_length_tokens,temperature=temperature,top_p=top_p):
+            if is_stop_word_or_prefix(x,["[|Human|]", "[|AI|]"]) is False:
+                if "[|Human|]" in x:
+                    x = x[:x.index("[|Human|]")].strip()
+                if "[|AI|]" in x:
+                    x = x[:x.index("[|AI|]")].strip()
+                x = x.strip()
+                a, b=   [[y[0],convert_to_markdown(y[1])] for y in history]+[[text, convert_to_markdown(x)]],history + [[text,x]]
+                yield a, b, "Generating..."
+            if shared_state.interrupted:
+                shared_state.recover()
+                try:
+                    yield a, b, "Stop: Success"
+                    return
+                except:
+                    pass
+    del input_ids
+    gc.collect()
+    torch.cuda.empty_cache()
+    try:
+        yield a,b,"Generate: Success"
+    except:
+        pass
+def reset_chat():
+    #id_new = chatbot.new_conversation()
+    #chatbot.change_conversation(id_new)
+    reset_textbox()
+##########################################################
+#Übersetzungs Ki nutzen
+def translate():
+    return "Kommt noch!"
+#Programmcode KI
+def coding():
+    return "Kommt noch!"
+#######################################################################
+#Darstellung mit Gradio
+with open("custom.css", "r", encoding="utf-8") as f:
+    customCSS = f.read()
+with gr.Blocks(theme=small_and_beautiful_theme) as demo:
+    history = gr.State([])
+    user_question = gr.State("")
+    gr.Markdown("KIs am LI - wähle aus, was du bzgl. KI-Bots ausprobieren möchtest!")
+    with gr.Tabs():
+        with gr.TabItem("LI-Chat"):
+            with gr.Row():
+                gr.HTML(title)
+                status_display = gr.Markdown("Erfolg", elem_id="status_display")
+            gr.Markdown(description_top)
+            with gr.Row(scale=1).style(equal_height=True):
+                with gr.Column(scale=5):
+                    with gr.Row(scale=1):
+                        chatbotGr = gr.Chatbot(elem_id="LI_chatbot").style(height="100%")
+                    with gr.Row(scale=1):
+                        with gr.Column(scale=12):
+                            user_input = gr.Textbox(
+                                show_label=False, placeholder="Gib deinen Text / Frage ein."
+                            ).style(container=False)
+                        with gr.Column(min_width=100, scale=1):
+                            submitBtn = gr.Button("Absenden")
+                        with gr.Column(min_width=100, scale=1):
+                            cancelBtn = gr.Button("Stoppen")
+                    with gr.Row(scale=1):
+                        emptyBtn = gr.Button(
+                            "🧹 Neuer Chat",
+                        )
+                with gr.Column():
+                    with gr.Column(min_width=50, scale=1):
+                        with gr.Tab(label="Parameter zum Model"):
+                            gr.Markdown("# Parameters")
+                            top_p = gr.Slider(
+                                minimum=-0,
+                                maximum=1.0,
+                                value=0.95,
+                                step=0.05,
+                                interactive=True,
+                                label="Top-p",
                             )
+                            temperature = gr.Slider(
+                                minimum=0.1,
+                                maximum=2.0,
+                                value=1,
+                                step=0.1,
+                                interactive=True,
+                                label="Temperature",
                             )
+                            max_length_tokens = gr.Slider(
+                                minimum=0,
+                                maximum=512,
+                                value=512,
+                                step=8,
+                                interactive=True,
+                                label="Max Generation Tokens",
+                            )
+                            max_context_length_tokens = gr.Slider(
+                                minimum=0,
+                                maximum=4096,
+                                value=2048,
+                                step=128,
+                                interactive=True,
+                                label="Max History Tokens",
+                            )
+            gr.Markdown(description)
+        with gr.TabItem("Übersetzungen"):
+            with gr.Row():
+                    gr.Textbox(
+                                show_label=False, placeholder="Ist noch in Arbeit..."
+                            ).style(container=False)
+        with gr.TabItem("Code-Generierungen"):
+            with gr.Row():
+                    gr.Textbox(
+                                show_label=False, placeholder="Ist noch in Arbeit..."
+                            ).style(container=False)
+    predict_args = dict(
+        fn=predict,
+        inputs=[
+            user_question,
+            chatbotGr,
+            history,
+            top_p,
+            temperature,
+            max_length_tokens,
+            max_context_length_tokens,
+        ],
+        outputs=[chatbotGr, history, status_display],
+        show_progress=True,
     )
+    #neuer Chat
+    reset_args = dict(
+        #fn=reset_chat, inputs=[], outputs=[user_input, status_display]
+        fn=reset_textbox, inputs=[], outputs=[user_input, status_display]
     )
+    # Chatbot
+    transfer_input_args = dict(
+        fn=transfer_input, inputs=[user_input], outputs=[user_question, user_input, submitBtn], show_progress=True
     )
+    #Listener auf Start-Click auf Button oder Return
+    predict_event1 = user_input.submit(**transfer_input_args).then(**predict_args)
+    predict_event2 = submitBtn.click(**transfer_input_args).then(**predict_args)
+    #Listener, Wenn reset...
+    emptyBtn.click(
+        reset_state,
+        outputs=[chatbotGr, history, status_display],
+        show_progress=True,
     )
+    emptyBtn.click(**reset_args)
+demo.title = "LI Chat"
+#demo.queue(concurrency_count=1).launch(share=True)
+demo.queue(concurrency_count=1).launch(debug=True)