File size: 5,008 Bytes
f4d8de3
ff77602
f4d8de3
 
 
 
 
 
c6cc895
f4d8de3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff77602
f4d8de3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff77602
 
 
 
 
 
 
 
 
 
f4d8de3
 
 
 
ff77602
 
 
f4d8de3
ff77602
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# -*- coding: utf-8 -*-
"""SimpleChatBot_OpenSourceModel_WithUI.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1q7EXhcR6gncrcwySFbN7u9fOIwTc4LtD

##*** Note : *** You will be NOT be charged for this exercise. Everything is OpenSource!

### This notebook presents how to make a simple conversational chatbot using Open Source language model that we will download from hugging-face hub

### Fix the UTF-8 encoding
"""


"""### Install the python packages. They are need to execute necessary to make the program work"""

"""### Import the necessary libraries"""

from langchain.llms.huggingface_pipeline import HuggingFacePipeline
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.embeddings import (OpenAIEmbeddings, HuggingFaceEmbeddings)
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

"""### Select and download the model from Hugging face

#### Hugging face hub contains a lot of pre-trained AI models related to computer vision, NLP, etc. For our task, we would need to use a text-generation model. Follow the steps below to choose and download a model

1. Go to this link -> https://huggingface.co/models?pipeline_tag=text-generation&sort=trending

2. For this example, we will be using the Mitsral 7B Instruct v0.2 [https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2]
"""

torch.set_default_device("cuda")

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             device_map='auto',
                                             torch_dtype="auto",
                                             load_in_4bit=True,
                                             trust_remote_code=True,
                                             low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

"""### Setup a text generation pipeline

"""

from transformers import pipeline

pipe = pipeline(
    task = "text-generation",
    model = model,
    tokenizer = tokenizer,
    pad_token_id = tokenizer.eos_token_id,
    temperature = 0.3,
    top_k = 50,
    top_p = 0.95,
    max_new_tokens=3072,
    repetition_penalty = 1.2
)

"""### Create an llm object"""

llm = HuggingFacePipeline(pipeline = pipe)

"""### Create a simple prompt tempelate using Langchain framework"""

template = """
"<s>[INST] You are a question and answering bot
    You always respond with a funny twist, and keep your
    answers short. Now answer this Question : {question}.
    To keep you more stateful, you also get help with previous
    chat history : {chat_history}[/INST]
"""
prompt = PromptTemplate(template=template, input_variables=["question", "chat_history"])

"""### Create an llm chain"""

llm_chain = LLMChain(prompt=prompt, llm=llm)

"""### Try invoking the LLM, with a simple chain"""

def ask_me_chat_completions(query, chat_history, llm_chain):
    response = llm_chain.run({"question":query,"chat_history":chat_history})
    return response

"""### Create a UI for more interactive conversation!

#### **Excercise **
1. Can you make the UI look better ?
2. Can you include more customizations (for example, setting up something like OpenAI playground ? Check out this link for how it looks like -> https://platform.openai.com/playground?mode=chat)
3. Host your application by creating space on HuggingFaceHub






"""

import gradio as gr

# Initialize chat history
chat_history = []

def chat_interface(query):
    global chat_history
    if query:  # Ensure the query is not empty
        response = ask_me_chat_completions(query, chat_history, llm_chain)
        formatted_response = f"Q: {query}\nA: {response}"  # Format the response
        chat_history.append(formatted_response)  # Append the formatted response to the history
        return formatted_response  # Return only the latest Q&A for display
    return ""  # Return an empty string if the query is empty

# Create the Gradio interface
with gr.Blocks() as demo:
    with gr.Tab("Chat"):
        query_input = gr.Textbox(label="Ask your questions here", placeholder="Type your question and press submit...")
        submit_button = gr.Button("Submit")
        response_output = gr.Textbox(label="Response", interactive=False, lines=6, value="", placeholder="Your answer will appear here...")

    submit_button.click(chat_interface, inputs=query_input, outputs=response_output)

# Launch the Gradio app
demo.launch()