ChatBotLI2Klein

Paused

App Files Files Community

ChatBotLI2Klein / app.py

alexkueck

Update app.py

3bdafbe 11 months ago

raw

history blame contribute delete

No virus

10.3 kB

	#!/usr/bin/env python
	# -- coding: utf-8 --

	import gradio as gr
	#from transformers import pipeline
	import torch
	from utils import *
	from presets import *
	from huggingface_hub import login
	from transformers import LlamaForCausalLM, LlamaTokenizer

	#antwort=""
	######################################################################
	#Modelle und Tokenizer

	#Hugging Chat nutzen
	# Create a chatbot connection
	#chatbot = hugchat.ChatBot(cookie_path="cookies.json")

	#Alternativ mit beliebigen Modellen:
	#base_model = "project-baize/baize-v2-7b" #load_8bit = False (in load_tokenizer_and_model)
	#base_model = "meta-llama/Llama-2-13b"
	#base_model="codellama/CodeLlama-13b-Instruct-hf"
	#########latest ###########
	#base_model = "tiiuae/falcon-40b-instruct"
	#base_model = "MAGAer13/mPLUG-Owl" #load_8bit = False (in load_tokenizer_and_model)
	#base_model = "alexkueck/li-tis-tuned-2" #load_8bit = False (in load_tokenizer_and_model)
	#base_model = "TheBloke/airoboros-13B-HF" #load_8bit = False (in load_tokenizer_and_model)
	#base_model = "EleutherAI/gpt-neo-1.3B" #load_8bit = False (in load_tokenizer_and_model)
	#base_model = "TheBloke/airoboros-13B-HF" #load_8bit = True
	#base_model = "TheBloke/vicuna-13B-1.1-HF" #load_8bit = ?
	#following runs only on GPU upgrade
	#base_model = "TheBloke/airoboros-65B-gpt4-1.3-GPTQ" #model_basename = "airoboros-65b-gpt4-1.3-GPTQ-4bit--1g.act.order"
	#base_model = "lmsys/vicuna-13b-v1.3"
	#base_model = "gpt2-xl" # options: ['gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl']
	base_model = "mistralai/Mistral-7B-v0.1"


	####################################
	#Model und Tokenzier laden
	tokenizer,model,device = load_tokenizer_and_model(base_model,False)

	################################
	#Alternativ: Model und Tokenizer für GPT2
	#tokenizer,model,device = load_tokenizer_and_model_gpt2(base_model,False)

	#Alternativ bloke gpt3 und4 - only with GPU upgarde
	#tokenizer,model,device = load_tokenizer_and_model_bloke_gpt(base_model, "airoboros-65b-gpt4-1.3-GPTQ-4bit--1g.act.order")

	#Alternativ Model und Tokenzier laden für Baize
	#tokenizer,model,device = load_tokenizer_and_model_Baize(base_model,False)

	########################################################################
	#Chat KI nutzen, um Text zu generieren...
	def predict(text,
	chatbotGr,
	history,
	top_p,
	temperature,
	max_length_tokens,
	max_context_length_tokens,):
	if text=="":
	yield chatbotGr,history,"Empty context."
	return
	try:
	model
	except:
	yield [[text,"No Model Found"]],[],"No Model Found"
	return

	inputs = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)
	if inputs is None:
	yield chatbotGr,history,"Input too long."
	return
	else:
	prompt,inputs=inputs
	begin_length = len(prompt)

	input_ids = inputs["input_ids"][:,-max_context_length_tokens:].to(device)
	torch.cuda.empty_cache()

	#torch.no_grad() bedeutet, dass für die betreffenden tensoren keine Ableitungen berechnet werden bei der backpropagation
	#hier soll das NN ja auch nicht geändert werden 8backprop ist nicht nötig), da es um interference-prompts geht!
	with torch.no_grad():
	#die vergangenen prompts werden alle als Tupel in history abgelegt sortiert nach 'Human' und 'AI'- dass sind daher auch die stop-words, die den jeweils nächsten Eintrag kennzeichnen
	for x in greedy_search(input_ids,model,tokenizer,stop_words=["[\|Human\|]", "[\|AI\|]"],max_length=max_length_tokens,temperature=temperature,top_p=top_p):
	if is_stop_word_or_prefix(x,["[\|Human\|]", "[\|AI\|]"]) is False:
	if "[\|Human\|]" in x:
	x = x[:x.index("[\|Human\|]")].strip()
	if "[\|AI\|]" in x:
	x = x[:x.index("[\|AI\|]")].strip()
	x = x.strip()
	a, b= [[y[0],convert_to_markdown(y[1])] for y in history]+[[text, convert_to_markdown(x)]],history + [[text,x]]
	yield a, b, "Generating..."
	if shared_state.interrupted:
	shared_state.recover()
	try:
	yield a, b, "Stop: Success"
	return
	except:
	pass
	del input_ids
	gc.collect()
	torch.cuda.empty_cache()

	try:
	yield a,b,"Generate: Success"
	except:
	pass


	def reset_chat():
	#id_new = chatbot.new_conversation()
	#chatbot.change_conversation(id_new)
	reset_textbox()

	#wenn 'Stop' Button geklickt, dann Message dazu und das Eingabe-Fenster leeren
	def cancel_outputing():
	reset_textbox()
	return "Stop Done"

	##########################################################
	#Übersetzungs Ki nutzen
	def translate():
	return "Kommt noch!"

	#Programmcode KI
	def coding():
	return "Kommt noch!"

	#######################################################################
	#Darstellung mit Gradio

	with open("custom.css", "r", encoding="utf-8") as f:
	customCSS = f.read()

	with gr.Blocks(theme=small_and_beautiful_theme) as demo:
	history = gr.State([])
	user_question = gr.State("")
	gr.Markdown("KIs am LI - wähle aus, was du bzgl. KI-Bots ausprobieren möchtest!")
	with gr.Tabs():
	with gr.TabItem("LI-Chat"):
	with gr.Row():
	gr.HTML(title)
	status_display = gr.Markdown("Erfolg", elem_id="status_display")
	gr.Markdown(description_top)
	with gr.Row(scale=1).style(equal_height=True):
	with gr.Column(scale=5):
	with gr.Row(scale=1):
	chatbotGr = gr.Chatbot(elem_id="LI_chatbot").style(height="100%")
	with gr.Row(scale=1):
	with gr.Column(scale=12):
	user_input = gr.Textbox(
	show_label=False, placeholder="Gib deinen Text / Frage ein."
	).style(container=False)
	with gr.Column(min_width=100, scale=1):
	submitBtn = gr.Button("Absenden")
	with gr.Column(min_width=100, scale=1):
	cancelBtn = gr.Button("Stoppen")
	with gr.Row(scale=1):
	emptyBtn = gr.Button(
	"🧹 Neuer Chat",
	)
	with gr.Column():
	with gr.Column(min_width=50, scale=1):
	with gr.Tab(label="Nur zum Testen:"):
	gr.Markdown("# Parameter")
	top_p = gr.Slider(
	minimum=-0,
	maximum=1.0,
	value=0.95,
	step=0.05,
	interactive=True,
	label="Top-p",
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=1,
	step=0.1,
	interactive=True,
	label="Temperature",
	)
	max_length_tokens = gr.Slider(
	minimum=0,
	maximum=512,
	value=512,
	step=8,
	interactive=True,
	label="Max Generation Tokens",
	)
	max_context_length_tokens = gr.Slider(
	minimum=0,
	maximum=4096,
	value=2048,
	step=128,
	interactive=True,
	label="Max History Tokens",
	)
	gr.Markdown(description)

	with gr.TabItem("Übersetzungen"):
	with gr.Row():
	gr.Textbox(
	show_label=False, placeholder="Ist noch in Arbeit..."
	).style(container=False)
	with gr.TabItem("Code-Generierungen"):
	with gr.Row():
	gr.Textbox(
	show_label=False, placeholder="Ist noch in Arbeit..."
	).style(container=False)

	predict_args = dict(
	fn=predict,
	inputs=[
	user_question,
	chatbotGr,
	history,
	top_p,
	temperature, #Variation der Antworten - stand. 1.0
	max_length_tokens,
	max_context_length_tokens,
	],
	outputs=[chatbotGr, history, status_display],
	show_progress=True,
	)

	#neuer Chat
	reset_args = dict(
	#fn=reset_chat, inputs=[], outputs=[user_input, status_display]
	fn=reset_textbox, inputs=[], outputs=[user_input, status_display]
	)

	# Chatbot
	transfer_input_args = dict(
	fn=transfer_input, inputs=[user_input], outputs=[user_question, user_input, submitBtn], show_progress=True
	)

	#Listener auf Start-Click auf Button oder Return
	predict_event1 = user_input.submit(transfer_input_args).then(predict_args)
	predict_event2 = submitBtn.click(transfer_input_args).then(predict_args)

	#Listener, Wenn reset...
	emptyBtn.click(
	reset_state,
	outputs=[chatbotGr, history, status_display],
	show_progress=True,
	)
	emptyBtn.click(**reset_args)

	#Berechnung oder Ausgabe anhalten (kann danach fortgesetzt werden)
	cancelBtn.click(cancel_outputing, [], [status_display], cancels=[predict_event1,predict_event2])
	#cancelBtn.click(lambda: None, None, chatbotGr, queue=False)

	demo.title = "LI Chat"
	#demo.queue(concurrency_count=1).launch(share=True)
	demo.queue(concurrency_count=1).launch(debug=True)