Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""SimpleChatBot_OpenSourceModel_WithUI.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1q7EXhcR6gncrcwySFbN7u9fOIwTc4LtD | |
##*** Note : *** You will be NOT be charged for this exercise. Everything is OpenSource! | |
### This notebook presents how to make a simple conversational chatbot using Open Source language model that we will download from hugging-face hub | |
### Fix the UTF-8 encoding | |
""" | |
"""### Install the python packages. They are need to execute necessary to make the program work""" | |
"""### Import the necessary libraries""" | |
from langchain.llms.huggingface_pipeline import HuggingFacePipeline | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from transformers import BitsAndBytesConfig | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain.embeddings import (OpenAIEmbeddings, HuggingFaceEmbeddings) | |
from langchain.schema import StrOutputParser | |
from langchain.schema.runnable import RunnablePassthrough | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
"""### Select and download the model from Hugging face | |
#### Hugging face hub contains a lot of pre-trained AI models related to computer vision, NLP, etc. For our task, we would need to use a text-generation model. Follow the steps below to choose and download a model | |
1. Go to this link -> https://huggingface.co/models?pipeline_tag=text-generation&sort=trending | |
2. For this example, we will be using the Mitsral 7B Instruct v0.2 [https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2] | |
""" | |
torch.set_default_device("cuda") | |
model_id = "mistralai/Mistral-7B-Instruct-v0.2" | |
model = AutoModelForCausalLM.from_pretrained(model_id, | |
device_map='auto', | |
torch_dtype="auto", | |
load_in_4bit=True, | |
trust_remote_code=True, | |
low_cpu_mem_usage=True) | |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) | |
"""### Setup a text generation pipeline | |
""" | |
from transformers import pipeline | |
pipe = pipeline( | |
task = "text-generation", | |
model = model, | |
tokenizer = tokenizer, | |
pad_token_id = tokenizer.eos_token_id, | |
temperature = 0.3, | |
top_k = 50, | |
top_p = 0.95, | |
max_new_tokens=3072, | |
repetition_penalty = 1.2 | |
) | |
"""### Create an llm object""" | |
llm = HuggingFacePipeline(pipeline = pipe) | |
"""### Create a simple prompt tempelate using Langchain framework""" | |
template = """ | |
"<s>[INST] You are a question and answering bot | |
You always respond with a funny twist, and keep your | |
answers short. Now answer this Question : {question}. | |
To keep you more stateful, you also get help with previous | |
chat history : {chat_history}[/INST] | |
""" | |
prompt = PromptTemplate(template=template, input_variables=["question", "chat_history"]) | |
"""### Create an llm chain""" | |
llm_chain = LLMChain(prompt=prompt, llm=llm) | |
"""### Try invoking the LLM, with a simple chain""" | |
def ask_me_chat_completions(query, chat_history, llm_chain): | |
response = llm_chain.run({"question":query,"chat_history":chat_history}) | |
return response | |
"""### Create a UI for more interactive conversation! | |
#### **Excercise ** | |
1. Can you make the UI look better ? | |
2. Can you include more customizations (for example, setting up something like OpenAI playground ? Check out this link for how it looks like -> https://platform.openai.com/playground?mode=chat) | |
3. Host your application by creating space on HuggingFaceHub | |
""" | |
import gradio as gr | |
# Initialize chat history | |
chat_history = [] | |
def chat_interface(query): | |
global chat_history | |
if query: # Ensure the query is not empty | |
response = ask_me_chat_completions(query, chat_history, llm_chain) | |
formatted_response = f"Q: {query}\nA: {response}" # Format the response | |
chat_history.append(formatted_response) # Append the formatted response to the history | |
return formatted_response # Return only the latest Q&A for display | |
return "" # Return an empty string if the query is empty | |
# Create the Gradio interface | |
with gr.Blocks() as demo: | |
with gr.Tab("Chat"): | |
query_input = gr.Textbox(label="Ask your questions here", placeholder="Type your question and press submit...") | |
submit_button = gr.Button("Submit") | |
response_output = gr.Textbox(label="Response", interactive=False, lines=6, value="", placeholder="Your answer will appear here...") | |
submit_button.click(chat_interface, inputs=query_input, outputs=response_output) | |
# Launch the Gradio app | |
demo.launch() | |