File size: 5,557 Bytes
ab02de1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from sentence_transformers import SentenceTransformer
from langchain.chains.question_answering import load_qa_chain
import pinecone
import os
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from huggingface_hub import hf_hub_download
from langchain.chains.question_answering import load_qa_chain
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
import torch
from langchain.chains import LLMChain
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
import gradio as gr
import time
from transformers import pipeline
from gtts import gTTS

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', '55f52f29-11e3-4b87-a6ba-9a5494dfdb58')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'asia-southeast1-gcp-free')
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment='gcp-starter'  # next to api key in console
)
index_name = "rpl-llama" # put in the name of your pinecone index here
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
model_name_or_path = "TheBloke/Llama-2-7b-Chat-GGUF"
model_basename = "llama-2-7b-chat.Q4_0.gguf"
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=True, # Verbose is required to pass to the callback manager
)
prompt_template="""
Use the embeddings, summarize and generate the answers to user's questions. Don't repeat sentences.


Context: {docs}
Question: {query}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

PROMPT=PromptTemplate(template=prompt_template, input_variables=["docs", "query"])
llm_chain = LLMChain(prompt=PROMPT, llm=llm)

# Initialize the chatbot model
asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
global bot_message
# Create a Gradio interface
with gr.Blocks() as demo:

    chatbot = gr.Chatbot(avatar_images=("human.png", "bot.png"), value=[[None, "Welcome to the Indore-Ekk Number Superstore family! We're thrilled to have you on board. \n How can I assist you today?"]])
    with gr.Row(label="Voice Input and Output"):
        with gr.Column(variant="panel"):
            audio_file = gr.Audio(label='Voice based Input',source="microphone",type="filepath",optional=True)
        with gr.Column(variant="panel"):
            play_audio = gr.Audio(label='Output Audio', autoplay=True)
    audio_out = gr.Textbox(visible=False)

    with gr.Row(label="Voice Input and Output"):
        with gr.Column(label='Text Based Input', variant="panel"):
            msg = gr.Textbox(placeholder="Ask me your doubts")
        with gr.Column(variant="panel"):
          with gr.Row():
            clear = gr.Button("Clear the Chatbot Conversation")

    def text_to_speech(text):
      var = gTTS(text = text,lang = 'en')
      var.save('eng.mp3')
      return gr.Audio.update(value='eng.mp3')

    def user(user_message, history):
        global query
        global fck
        query = user_message
        fck = model_response(query)
        print(user_message,fck)
        return '', history + [[user_message, None]],gr.Textbox.update(value=fck)

    def model_response(query):
        global a
        #query = "What is the leave policy?"
        docs=docsearch.similarity_search(query)
        docs = docs[0].page_content+docs[1].page_content+docs[2].page_content
        a = llm_chain.run({'docs':docs,'query':query})
        return a

    def bot(history):
      global bot_message
      bot_message = model_response(query)
      history[-1][1] = ""
      for character in fck:
          history[-1][1] += character
          time.sleep(0.05)
          yield history


    def speech_to_text(audio_file,history):
      if audio_file == None:
        return "", history + [[None, None]]
      else:
        global query
        global fck
        text = asr(audio_file)["text"]
        query = text
        fck = model_response(query)
        print(text)
        return None, history + [[text, None]],gr.Textbox.update(value=fck)
        #return text

    audio_file.stop_recording(speech_to_text, [audio_file,chatbot], [audio_file,chatbot,audio_out], queue=False, show_progress=False).then(bot, chatbot, chatbot)

    msg.submit(user, [msg, chatbot], [msg, chatbot,audio_out], queue=False).then(
        bot, chatbot, chatbot
    )

    clear.click(lambda: None, None, chatbot, queue=False)
    audio_out.change(text_to_speech,inputs=[audio_out], outputs=play_audio)

demo.queue()
demo.launch(debug=True)