### # Elo based comparison of models # https://chat.lmsys.org/?leaderboard ### ## # visual libraries gradio , could be streamlit as well or cl ## import gradio as gr ## # Libraries # Langchain - https://python.langchain.com/docs/get_started/introduction.html # Used for simplifiing calls, task ## import langchain import transformers # https://huggingface.co/spaces/joyson072/LLm-Langchain/blob/main/app.py from langchain.llms import HuggingFaceHub # https://cobusgreyling.medium.com/langchain-creating-large-language-model-llm-applications-via-huggingface-192423883a74 from langchain.chains import ConversationChain from langchain.chains.conversation.memory import ConversationBufferMemory #conversation = ConversationChain( # llm=llm, # verbose=True, # memory=ConversationBufferMemory() #) #conversation.predict(input="Hi there!") # for the chain and prompt from langchain.prompts import PromptTemplate from langchain.chains import LLMChain, SimpleSequentialChain ################### llm = HuggingFaceHub( repo_id="google/flan-ul2", # repo_id="google/flan-t5-small", model_kwargs={"temperature":0.1, "max_new_tokens":250}) # Chain 1: Generating a rephrased version of the user's question template = """{question}\n\n""" prompt_template = PromptTemplate(input_variables=["question"], template=template) question_chain = LLMChain(llm=llm, prompt=prompt_template) # Chain 2: Generating assumptions made in the statement template = """Here is a statement: {statement} Make a bullet point list of the assumptions you made when producing the above statement.\n\n""" prompt_template = PromptTemplate(input_variables=["statement"], template=template) assumptions_chain = LLMChain(llm=llm, prompt=prompt_template) assumptions_chain_seq = SimpleSequentialChain( chains=[question_chain, assumptions_chain], verbose=True ) # Chain 3: Fact checking the assumptions template = """Here is a bullet point list of assertions: {assertions} For each assertion, determine whether it is true or false. If it is false, explain why.\n\n""" prompt_template = PromptTemplate(input_variables=["assertions"], template=template) fact_checker_chain = LLMChain(llm=llm, prompt=prompt_template) fact_checker_chain_seq = SimpleSequentialChain( chains=[question_chain, assumptions_chain, fact_checker_chain], verbose=True ) # Final Chain: Generating the final answer to the user's question based on the facts and assumptions template = """In light of the above facts, how would you answer the question '{}'""".format( "What is the capitol of the usa?" # user_question ) template = """{facts}\n""" + template prompt_template = PromptTemplate(input_variables=["facts"], template=template) answer_chain = LLMChain(llm=llm, prompt=prompt_template) overall_chain = SimpleSequentialChain( chains=[question_chain, assumptions_chain, fact_checker_chain, answer_chain], verbose=True, ) #print(overall_chain.run("What is the capitol of the usa?")) ################## #import model class and tokenizer from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration ### # Definition of different purspose prompts # https://huggingface.co/spaces/Chris4K/rlhf-arena/edit/main/app.py #### def prompt_human_instruct(system_msg, history): return system_msg.strip() + "\n" + \ "\n".join(["\n".join(["###Human: "+item[0], "###Assistant: "+item[1]]) for item in history]) def prompt_instruct(system_msg, history): return system_msg.strip() + "\n" + \ "\n".join(["\n".join(["### Instruction: "+item[0], "### Response: "+item[1]]) for item in history]) def prompt_chat(system_msg, history): return system_msg.strip() + "\n" + \ "\n".join(["\n".join(["USER: "+item[0], "ASSISTANT: "+item[1]]) for item in history]) def prompt_roleplay(system_msg, history): return "<|system|>" + system_msg.strip() + "\n" + \ "\n".join(["\n".join(["<|user|>"+item[0], "<|model|>"+item[1]]) for item in history]) #### ## Sentinent models # https://huggingface.co/spaces/CK42/sentiment-model-comparison # 1, 4 seem best for german #### model_id_1 = "nlptown/bert-base-multilingual-uncased-sentiment" model_id_2 = "microsoft/deberta-xlarge-mnli" model_id_3 = "distilbert-base-uncased-finetuned-sst-2-english" model_id_4 = "lordtt13/emo-mobilebert" model_id_5 = "juliensimon/reviews-sentiment-analysis" model_id_6 = "sbcBI/sentiment_analysis_model" model_id_7 = "oliverguhr/german-sentiment-bert" # https://colab.research.google.com/drive/1hrS6_g14EcOD4ezwSGlGX2zxJegX5uNX#scrollTo=NUwUR9U7qkld #llm_hf_sentiment = HuggingFaceHub( # repo_id= model_id_7, # model_kwargs={"temperature":0.9 } #) from transformers import pipeline # ## Possible pipeline #"['audio-classification', 'automatic-speech-recognition', 'conversational', 'depth-estimation', 'document-question-answering', #'feature-extraction', 'fill-mask', 'image-classification', 'image-segmentation', 'image-to-text', 'mask-generation', 'ner', #'object-detection', 'question-answering', 'sentiment-analysis', 'summarization', 'table-question-answering', 'text-classification', #'text-generation', 'text2text-generation', 'token-classification', 'translation', 'video-classification', 'visual-question-answering', #'vqa', 'zero-shot-audio-classification', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection', #'translation_XX_to_YY']" ## sentiment_pipe = pipeline("sentiment-analysis", model=model_id_7) #pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es") def pipeline_predict_sentiment(text): sentiment_result = sentiment_pipe(text) print(sentiment_result) return sentiment_result chat_pipe = pipeline("conversational") def pipeline_predict_chat(text): sentiment_result = chat_pipe(text) print(sentiment_result) return sentiment_result #['huggingface', 'models', 'spaces'] #sentiment = gr.load(model_id_7, src="huggingface") #def sentiment (message): # sentiment_label = sentiment.predict(message) # print ( sentiment_label) # return sentiment_label #sentiment_prompt = PromptTemplate( # input_variables=["text_input"], # template="Extract the key facts out of this text. Don't include opinions. Give each fact a number and keep them short sentences. :\n\n {text_input}" #) #def sentiment ( message): # sentiment_chain = LLMChain(llm=llm_hf_sentiment, prompt=sentiment_prompt) # facts = sentiment_chain.run(message) # print(facts) # return facts #### ## Chat models # https://huggingface.co/spaces/CK42/sentiment-model-comparison # 1 seem best for testing #### chat_model_facebook_blenderbot_400M_distill = "facebook/blenderbot-400M-distill" chat_model_HenryJJ_vincua_13b = "HenryJJ/vincua-13b" text = "Why did the chicken cross the road?" #output_question_1 = llm_hf(text) #print(output_question_1) ### ## FACT EXTRACTION ### # https://colab.research.google.com/drive/1hrS6_g14EcOD4ezwSGlGX2zxJegX5uNX#scrollTo=NUwUR9U7qkld llm_factextract = HuggingFaceHub( # repo_id="google/flan-ul2", repo_id="google/flan-t5-small", model_kwargs={"temperature":0.1, "max_new_tokens":250}) fact_extraction_prompt = PromptTemplate( input_variables=["text_input"], template="Extract the key facts out of this text. Don't include opinions. Give each fact a number and keep them short sentences. :\n\n {text_input}" ) def factextraction (message): fact_extraction_chain = LLMChain(llm=llm_factextract, prompt=fact_extraction_prompt) facts = fact_extraction_chain.run(message) print(facts) return facts #### ## models # 1 seem best for testing #### #download and setup the model and tokenizer model_name_chat = 'facebook/blenderbot-400M-distill' tokenizer = BlenderbotTokenizer.from_pretrained(model_name_chat) model_chat = BlenderbotForConditionalGeneration.from_pretrained(model_name_chat) def func (message): inputs = tokenizer(message, return_tensors="pt") result = model_chat.generate(**inputs) print(result) return tokenizer.decode(result[0]) title="Conversation Bota" desc="Some way ... " app = gr.Interface( fn=func, title="Conversation Bota", inputs=["text", "checkbox", gr.Slider(0, 100)], outputs=["text", "number"], ) ##### ###### ###### examples = [ ["Erzähl mit eine Geschichte!",50,2,3,1,"Deutsch"], ["Welche Blumen sollte man jemandem zum Valentinstag schenken?",50,1,0,1,"Deutsch"], ["Please write a step by step recipe to make bolognese pasta!",50,2,3,2,"Englisch"] ] tDeEn = pipeline(model="Helsinki-NLP/opus-mt-de-en") tEnDe = pipeline(model="Helsinki-NLP/opus-mt-en-de") bot = pipeline(model="google/flan-t5-large") def solve(text,max_length,length_penalty,no_repeat_ngram_size,num_beams,language): if(language=="Deutsch"): text=tDeEn(text)[0]["translation_text"] out=bot(text,max_length=max_length, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size, num_beams=num_beams, early_stopping=True)[0]["generated_text"] if(language=="Deutsch"): out=tEnDe(out)[0]["translation_text"] return out task = gr.Interface( fn=solve, inputs=[ gr.Textbox(lines=5,max_lines=6,label="Frage"), gr.Slider(minimum=1.0,maximum=200.0,value=50.0,step=1,interactive=True,label="max_length"), gr.Slider(minimum=1.0,maximum=20.0,value=1.0,step=1,interactive=True,label="length_penalty"), gr.Slider(minimum=0.0,maximum=5.0,value=3.0,step=1,interactive=True,label="no_repeat_ngram_size"), gr.Slider(minimum=1.0,maximum=20.0,value=1.0,step=1,interactive=True,label="num_beams"), gr.Dropdown(["Deutsch", "Englisch"],value="Deutsch"), ], outputs="text", title=title, description=desc, examples=examples ) #### from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextIteratorStreamer from threading import Thread model_id = "philschmid/instruct-igel-001" model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True) tokenizer = AutoTokenizer.from_pretrained(model_id) prompt_template = f"### Anweisung:\n{{input}}\n\n### Antwort:" def generate(instruction, temperature=1.0, max_new_tokens=256, top_p=0.9, length_penalty=1.0): formatted_instruction = prompt_template.format(input=instruction) # make sure temperature top_p and length_penalty are floats temperature = float(temperature) top_p = float(top_p) length_penalty = float(length_penalty) # COMMENT IN FOR NON STREAMING # generation_config = GenerationConfig( # do_sample=True, # top_p=top_p, # top_k=0, # temperature=temperature, # max_new_tokens=max_new_tokens, # early_stopping=True, # length_penalty=length_penalty, # eos_token_id=tokenizer.eos_token_id, # pad_token_id=tokenizer.pad_token_id, # ) # input_ids = tokenizer( # formatted_instruction, return_tensors="pt", truncation=True, max_length=2048 # ).input_ids.cuda() # with torch.inference_mode(), torch.autocast("cuda"): # outputs = model.generate(input_ids=input_ids, generation_config=generation_config)[0] # output = tokenizer.decode(outputs.detach().cpu().numpy(), skip_special_tokens=True) # return output.split("### Antwort:\n")[1] # STREAMING BASED ON git+https://github.com/gante/transformers.git@streamer_iterator # streaming streamer = TextIteratorStreamer(tokenizer) model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048) # move to gpu model_inputs = {k: v.to(device) for k, v in model_inputs.items()} generate_kwargs = dict( top_p=top_p, top_k=0, temperature=temperature, do_sample=True, max_new_tokens=max_new_tokens, early_stopping=True, length_penalty=length_penalty, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, ) t = Thread(target=model.generate, kwargs={**dict(model_inputs, streamer=streamer), **generate_kwargs}) t.start() output = "" hidden_output = "" for new_text in streamer: # skip streaming until new text is available if len(hidden_output) <= len(formatted_instruction): hidden_output += new_text continue # replace eos token if tokenizer.eos_token in new_text: new_text = new_text.replace(tokenizer.eos_token, "") output += new_text yield output # if HF_TOKEN: # save_inputs_and_outputs(formatted_instruction, output, generate_kwargs) return output #app.launch() #################### #app_sentiment = gr.Interface(fn=predict , inputs="textbox", outputs="textbox", title="Conversation Bot") # create a public link, set `share=True` in `launch() #app_sentiment.launch() #################### ### ### ### classifier = pipeline("zero-shot-classification") text = "This is a tutorial about Hugging Face." candidate_labels = ["informieren", "kaufen", "beschweren", "verkaufen"] def topic_sale_inform (text): res = classifier(text, candidate_labels) print (res) return res #### #conversation = Conversation("Welcome") def callChains(current_message,max_length,length_penalty,no_repeat_ngram_size,num_beams,language): #final_answer = generate(current_message, 1.0, 256, 0.9, 1.0) sentiment_analysis_result = pipeline_predict_sentiment(current_message) topic_sale_inform_result = topic_sale_inform(current_message) #conversation.append_response("The Big lebowski.") #conversation.add_user_input("Is it good?") final_answer = func(current_message) #final_answer = solve(current_message,max_length,length_penalty,no_repeat_ngram_size,num_beams,language) return final_answer, sentiment_analysis_result, topic_sale_inform_result ### current_message_inputfield = gr.Textbox(lines=5,max_lines=6,label="Gib hier eine Nachricht ein") final_answer_inputfield = gr.Textbox(label="Antwort ", placeholder="Hier kommt die Antwort hin ...") sentiment_analysis_result_inputfield = gr.Textbox(label="Sentiment ") topic_sale_inform_result_inputfield = gr.Textbox(label="Thema ") chat_bot = gr.Interface(fn=callChains , inputs=[ current_message_inputfield, gr.Slider(minimum=1.0,maximum=200.0,value=50.0,step=1,interactive=True,label="max_length"), gr.Slider(minimum=1.0,maximum=20.0,value=1.0,step=1,interactive=True,label="length_penalty"), gr.Slider(minimum=0.0,maximum=5.0,value=3.0,step=1,interactive=True,label="no_repeat_ngram_size"), gr.Slider(minimum=1.0,maximum=20.0,value=1.0,step=1,interactive=True,label="num_beams"), gr.Dropdown(["Deutsch", "Englisch"],value="Deutsch"), ], outputs=[final_answer_inputfield,sentiment_analysis_result_inputfield,topic_sale_inform_result_inputfield], title="Conversation Bot with extra") # create a public link, set `share=True` in `launch() chat_bot.launch() #################### app_facts = gr.Interface(fn=factextraction , inputs="textbox", outputs="textbox", title="Conversation Bots") # create a public link, set `share=True` in `launch() #app_facts.launch() ####################