Spaces:

mkumar87AI
/

FunnyChatBot

Runtime error

App Files Files Community

mkumar87AI commited on Feb 5

Commit

f4d8de3

•

1 Parent(s): 7c629c6

Upload app.py

Browse files

Files changed (1) hide show

app.py +125 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+# -*- coding: utf-8 -*-
+"""SimpleChatBot_OpenSourceModel.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1q7EXhcR6gncrcwySFbN7u9fOIwTc4LtD
+##*** Note : *** You will be NOT be charged for this exercise. Everything is OpenSource
+### This notebook presents how to make a simple conversational chatbot using Open Source language model that we will download from hugging-face hub
+### Fix the UTF-8 encoding
+"""
+import locale
+locale.getpreferredencoding = lambda: "UTF-8"
+"""### Install the python packages. They are need to execute necessary to make the program work"""
+pip install -qq -U langchain transformers sentence-transformers bitsandbytes accelerate
+!pip install git+https://github.com/huggingface/transformers@v4.31-release
+!pip install transformers -U
+"""### Import the necessary libraries"""
+from langchain.llms.huggingface_pipeline import HuggingFacePipeline
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers import BitsAndBytesConfig
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.embeddings import (OpenAIEmbeddings, HuggingFaceEmbeddings)
+from langchain.schema import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.document_loaders import PyPDFLoader
+from langchain.callbacks.manager import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+"""### Select and download the model from Hugging face
+#### Hugging face hub contains a lot of pre-trained AI models related to computer vision, NLP, etc. For our task, we would need to use a text-generation model. Follow the steps below to choose and download a model
+1. Go to this link -> https://huggingface.co/models?pipeline_tag=text-generation&sort=trending
+2. For this example, we will be using the Mitsral 7B Instruct v0.2 [https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2]
+"""
+torch.set_default_device("cuda")
+model_id = "mistralai/Mistral-7B-Instruct-v0.2"
+model = AutoModelForCausalLM.from_pretrained(model_id,
+                                             device_map='auto',
+                                             torch_dtype="auto",
+                                             load_in_4bit=True,
+                                             trust_remote_code=True,
+                                             low_cpu_mem_usage=True)
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+"""### Setup the pipeline using tokenizer and model
+"""
+from transformers import pipeline
+pipe = pipeline(
+    task = "text-generation",
+    model = model,
+    tokenizer = tokenizer,
+    pad_token_id = tokenizer.eos_token_id,
+    temperature = 0.3,
+    top_k = 50,
+    top_p = 0.95,
+    max_new_tokens=3072,
+    repetition_penalty = 1.2
+)
+"""### Create an llm object"""
+llm = HuggingFacePipeline(pipeline = pipe)
+"""### Create a simple prompt tempelate using Langchain framework"""
+template = """
+"<s>[INST] You are a question and answering bot
+    You always respond with a funny twist, and keep your
+    answers short. Now answer this Question : {question}.
+    To keep you more stateful, you also get help with previous
+    chat history : {chat_history}[/INST]
+"""
+prompt = PromptTemplate(template=template, input_variables=["question", "chat_history"])
+"""### Create an llm chain"""
+llm_chain = LLMChain(prompt=prompt, llm=llm)
+"""### Try invoking the LLM, with a simple chain"""
+def ask_me_chat_completions(query, chat_history, llm_chain):
+    response = llm_chain.run({"question":query,"chat_history":chat_history})
+    return response
+"""### Question and Answer segment
+#### **Activity :  ** Try the following things
+1. Search on the internet regarding the context length of the model
+Check when does the model reach its context limit and stops responding ?
+2. Can you do anything in the prompt to fit more conversations in the context length ?
+3. Can you programtically increase the context window ?
+4. Can you programatically make the memory management better ?
+"""
+query = None
+chat_history = []
+while query != "q":
+    query = input("Ask your questions here, press q to quit: ")
+    if query != "q":
+      response = ask_me_chat_completions(query, chat_history, llm_chain)
+      print(f'Your query returned the following response: {response}')
+      chat_history.append(response)