# -*- coding: utf-8 -*- """SimpleChatBot_OpenSourceModel_WithUI.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1q7EXhcR6gncrcwySFbN7u9fOIwTc4LtD ##*** Note : *** You will be NOT be charged for this exercise. Everything is OpenSource! ### This notebook presents how to make a simple conversational chatbot using Open Source language model that we will download from hugging-face hub ### Fix the UTF-8 encoding """ """### Install the python packages. They are need to execute necessary to make the program work""" """### Import the necessary libraries""" from langchain.llms.huggingface_pipeline import HuggingFacePipeline import torch from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import BitsAndBytesConfig from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.embeddings import (OpenAIEmbeddings, HuggingFaceEmbeddings) from langchain.schema import StrOutputParser from langchain.schema.runnable import RunnablePassthrough from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.document_loaders import PyPDFLoader from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler """### Select and download the model from Hugging face #### Hugging face hub contains a lot of pre-trained AI models related to computer vision, NLP, etc. For our task, we would need to use a text-generation model. Follow the steps below to choose and download a model 1. Go to this link -> https://huggingface.co/models?pipeline_tag=text-generation&sort=trending 2. For this example, we will be using the Mitsral 7B Instruct v0.2 [https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2] """ torch.set_default_device("cuda") model_id = "mistralai/Mistral-7B-Instruct-v0.2" model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype="auto", load_in_4bit=True, trust_remote_code=True, low_cpu_mem_usage=True) tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) """### Setup a text generation pipeline """ from transformers import pipeline pipe = pipeline( task = "text-generation", model = model, tokenizer = tokenizer, pad_token_id = tokenizer.eos_token_id, temperature = 0.3, top_k = 50, top_p = 0.95, max_new_tokens=3072, repetition_penalty = 1.2 ) """### Create an llm object""" llm = HuggingFacePipeline(pipeline = pipe) """### Create a simple prompt tempelate using Langchain framework""" template = """ "[INST] You are a question and answering bot You always respond with a funny twist, and keep your answers short. Now answer this Question : {question}. To keep you more stateful, you also get help with previous chat history : {chat_history}[/INST] """ prompt = PromptTemplate(template=template, input_variables=["question", "chat_history"]) """### Create an llm chain""" llm_chain = LLMChain(prompt=prompt, llm=llm) """### Try invoking the LLM, with a simple chain""" def ask_me_chat_completions(query, chat_history, llm_chain): response = llm_chain.run({"question":query,"chat_history":chat_history}) return response """### Create a UI for more interactive conversation! #### **Excercise ** 1. Can you make the UI look better ? 2. Can you include more customizations (for example, setting up something like OpenAI playground ? Check out this link for how it looks like -> https://platform.openai.com/playground?mode=chat) 3. Host your application by creating space on HuggingFaceHub """ import gradio as gr # Initialize chat history chat_history = [] def chat_interface(query): global chat_history if query: # Ensure the query is not empty response = ask_me_chat_completions(query, chat_history, llm_chain) formatted_response = f"Q: {query}\nA: {response}" # Format the response chat_history.append(formatted_response) # Append the formatted response to the history return formatted_response # Return only the latest Q&A for display return "" # Return an empty string if the query is empty # Create the Gradio interface with gr.Blocks() as demo: with gr.Tab("Chat"): query_input = gr.Textbox(label="Ask your questions here", placeholder="Type your question and press submit...") submit_button = gr.Button("Submit") response_output = gr.Textbox(label="Response", interactive=False, lines=6, value="", placeholder="Your answer will appear here...") submit_button.click(chat_interface, inputs=query_input, outputs=response_output) # Launch the Gradio app demo.launch()