File size: 17,762 Bytes
191a7c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
import openai
import langchain
import os
import json
from pprint import pprint
import pinecone
import time
from langchain.chat_models import AzureChatOpenAI
from openai.embeddings_utils import get_embedding, cosine_similarity
import tiktoken
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain.chains import ConversationalRetrievalChain, ConversationChain
from langchain.memory import ConversationBufferWindowMemory
from typing import Optional
import pandas as pd
import ast
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
# from langchain.chains.openai_functions import (
#     create_openai_fn_chain,
#     create_structured_output_chain,
# )
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.chat_models import ChatOpenAI
# from langchain.prompts import chat_prompt
from langchain.prompts import ChatPromptTemplate
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.vectorstores import FAISS
from tqdm.autonotebook import tqdm 



openai.api_type = "azure"
openai.api_base = "https://di-sandbox-gpt4.openai.azure.com/"
openai.api_version = "2023-07-01-preview"
openai.api_key = "69ec3919a7314784be9c4f7414286fba"

os.environ['OPENAI_API_KEY']=openai.api_key
os.environ['OPENAI_API_BASE'] = openai.api_base
os.environ['OPENAI_API_VERSION'] = openai.api_version
os.environ['OPENAI_API_TYPE'] = openai.api_type

PINECONE_API_KEY = '49e9d57f-ca7b-45d8-9fe5-b02db54b2dc7'

pinecone.init(
    api_key=os.environ.get('PINECONE_API_KEY') or '49e9d57f-ca7b-45d8-9fe5-b02db54b2dc7',
    environment=os.environ.get('PINECONE_ENVIRONMENT') or 'gcp-starter'
)


def MasterLLM(Role, FewShotExamples, UserPrompt):
    Role = [{"role": "system", "content": Role}]
    UserPrompt = [{"role": "user", "content": UserPrompt}]
    message = [*Role, *FewShotExamples, *UserPrompt]
    response = openai.ChatCompletion.create(
      engine="GPT4_32k",
      messages= message,
      temperature=0,
        # max_tokens=350,
        # top_p=0.95,
        # frequency_penalty=0,
        # presence_penalty=0,
        # stop=None
    )
    return response['choices'][0]['message']['content']

def intent_recognition(UserPrompt):
    
    IntentRole = "You are an expert intent classifier. Your job is to understand user input and classify into either of the following 'Describe', 'Compare', 'Others' and 'Transfer' as per the given examples.Do not explain"
 
    IntentExamples = [
        {"role": "user", "content": "What is the difference between step X and step y for a opening a XYZ?"},
        {"role": "assistant", "content": "Compare"},
        {"role": "user", "content": "Compare differences between the following services"},
        {"role": "assistant", "content": "Compare"},
        {"role": "user", "content": "Differentiate the following services on the basis of price"},
        {"role": "assistant", "content": "Compare"},
        {"role": "user", "content": "Which of the follwing is better"},
        {"role": "assistant", "content": "Compare"},
        {"role": "user", "content": "Explain the difference between pricing of service A and B"},
        {"role": "assistant", "content": "Compare"},
        {"role": "user", "content": "Why should I choose service A over service B"},
        {"role": "assistant", "content": "Compare"},
        {"role": "user", "content": "Weigh the following services on their requirements"},
        {"role": "assistant", "content": "Compare"},
        {"role": "user", "content": "Service A vs Service B"},
        {"role": "assistant", "content": "Compare"},
        {"role": "user", "content": "Describe requirements for the following service"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "Tell me about A"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "Tell me about wholesale of software and retail sale of software"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "How can I appply for the following service"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "What are the conditions for A"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "How much is the price to apply for the following service"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "Explain the requirements of service A"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "Define the important characteristics of the following service"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "Difference between the following services"},
        {"role": "assistant", "content": "Compare"},
        {"role": "user", "content": "How to do service A"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "How much time to get A for License B"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "Can you help me with XYZVS"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "Please help me with XYZVS"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "How to enquire service XYZVS"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "How to access the service XYZVS"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "How to get information on service XYZVS"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "How to find about service XYZVS"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "Provide info on something"},
        {"role": "assistant", "content": "Describe"},
        {"role": "user", "content": "Can you send this to me"},
        {"role": "assistant", "content": "Transfer"},
        {"role": "user", "content": "Send it to my number"},
        {"role": "assistant", "content": "Transfer"},
        {"role": "user", "content": "This is my number XXXXXXXXX Send it to me"},
        {"role": "assistant", "content": "Transfer"},
        {"role": "user", "content": "Transfer it to my nummber"},
        {"role": "assistant", "content": "Transfer"},
        {"role": "user", "content": "Send this to me on Whatsapp"},
        {"role": "assistant", "content": "Transfer"},
        {"role": "user", "content": "I want to see this on my phone"},
        {"role": "assistant", "content": "Transfer"},
        {"role": "user", "content": "Whatsapp it to me"},
        {"role": "assistant", "content": "Transfer"},
        {"role": "user", "content": "WhatsApp it to me please"},
        {"role": "assistant", "content": "Transfer"}
    ]
   
    intent = MasterLLM(IntentRole, IntentExamples, UserPrompt)

    return intent

def entity_recognition_main(UserPrompt, LicenseName):
    entity = []
    
    try:
        EntityRole = f"You are a Named Entity Recognition expert. You can find relevant entities from a list based on the mentioned user prompt. Following is a list of license names: {LicenseName}. Extract relevant entities from the list of license names based on User Prompt.Extract exact license names from the list matching the extracted entities. Make sure the extracted entities are present in the given list of license names. Strictly make sure that output includes the entity from the given list of license name and not the entity from User Prompt. Answer should only be in a python list format. Make sure to match entities on synonyms as well. Remove duplicate entities from the response. If there is no match, print blank list. Again, strictly make sure the entity returned must be an exact match of license names in the provided list"
        EntityExamples = []    
        entity = ast.literal_eval(MasterLLM(EntityRole, EntityExamples, UserPrompt))

    except:
        entity = []
        
    entity = list(set(entity))

    return entity


def entity_recognition_sub(UserPrompt, LicenseFetched,License_Service):
    entity = []
    
    Final = License_Service[License_Service['License Name'].isin(LicenseFetched)]
    # print(Final)
    try:
        EntityRole = f"You are a Named Entity Recognition expert. You can find relevant entities from a list based on the mentioned user prompt. Using the following data: {Final}. Extract relevant entities from the list of service names based on User Prompt.Extract exact service names from the list matching the extracted entities. Make sure the extracted entities are present in the given list of service names. Strictly make sure that output includes the entity from the given list of service name and not the entity from User Prompt. Answer should only be in a python list format. Make sure to match entities on synonyms as well, like service fees as price. Remove duplicate entities from the response. If there is no match, print blank list. Again, strictly make sure the entity returned must be an exact match of service names in the provided list"
        EntityExamples = []    
        entity = ast.literal_eval(MasterLLM(EntityRole, EntityExamples, UserPrompt))

    except:
        entity = []
        
    entity = list(set(entity))

    return entity

def describe(entity, final_json, UserPrompt):
    
    DescribeRole = "You are an online assistant for Saudi Business Centre. You will provide concise and to the point answers to the user questions. Ensure answers are in bullets only. Do not assume anything while providing output. Make sure the tone is formal and friendly. Make sure put the answer like a conversation with filler words. Ensure that the answers are in a helpful tone and user friendly.Make sure the answer is not detailed unless asked specifically by the user,strictly a maximum of four lines. But make sure the short answer must cover the crux of all entities in the given context as per user prompt"
    DescribeExamples = []
    UserPrompt = f"Describe {entity} on the following data {final_json} as per the following question {UserPrompt}."
    
    final_text = MasterLLM(DescribeRole, DescribeExamples, UserPrompt)
  
    return final_text

def compare(entity, final_json, UserPrompt):
    
    CompareRole = "You are an online assistant for Saudi Business Centre. You will provide concise and to the point answers to the user questions. Ensure the answers are in bullets only. Do not assume anything while providing output. Make sure the tone is formal and friendly. Make sure put the answer like a conversation with filler words. Make sure the answer is not detailed unless asked specifically by the user, strictly a maximum of four lines. But make sure the short answer must cover the crux of all entities in the given context as per user prompt Ensure that the answers are in a helpful tone and user friendly.. Provide answer to the following: {context}"
    CompareExamples = []
    UserPrompt = f"Compare the following {entity} on their following data {final_json} as per the following question {UserPrompt}"
                        
    final_text = MasterLLM(CompareRole, CompareExamples, UserPrompt)

    return final_text

def RAG(UserPrompt,entity,qa_chain_for_rag):

    listToStr = ', '.join([str(elem) for i, elem in enumerate(entity)]) 
    prompt = f"""You are an online assistant for Saudi Business Centre. You will provide concise and to the point answers to the user questions according to the context provided. Ensure the answers are in bullets only. Do not assume anything while providing output. Make sure the tone is formal and friendly.Make sure you're able to answer generic questions from the provided data. Answer the question given the information in those
    contexts. Apart from questions related to licenses and services, you can also use information from data to answer user queries about businesses, investments and how SBC can help them achieve the same. If you cannot find the answer to the question, say "I don't know".Answer the following User query. Make sure the answer is not detailed unless asked specifically by the user, strictly a maximum of four lines. But make sure the short answer must cover the crux of all entities in the given context as per user prompt"""
    
    if len(entity) == 0:
        UserPrompt = UserPrompt
    else:
        UserPrompt = UserPrompt + f" using the following licenses {listToStr}"
    
    final_text = qa_chain_for_rag.run(prompt+":"+UserPrompt)
 
    return final_text

def set_vector_store(index_name):

    embedding_model = OpenAIEmbeddings(openai_api_key = openai.api_key, 
                              deployment="text-embedding-ada-002",
                              model="text-embedding-ada-002",
                              openai_api_base="https://di-sandbox-gpt4.openai.azure.com/",
                              openai_api_type="azure")  
    
    docs = ["Combined_data_final_V2.csv"]

    embeddings = embedding_model.embed_documents(docs)

    text_field = 'text'  # field in metadata that contains text content
    
    if index_name not in pinecone.list_indexes():
        pinecone.create_index(
            index_name,
            dimension=len(embeddings[0]),
            metric='cosine'
        )
        # wait for index to finish initialization
        while not pinecone.describe_index(index_name).status['ready']:
            time.sleep(1)
    
    index = pinecone.Index(index_name)
    
#     time.sleep(180)

#     batch_size = 32

#     for i in range(0, len(data), batch_size):
#         i_end = min(len(data), i+batch_size)
#         batch = data.iloc[i:i_end]
#         ids = [f"{x['id']}" for i, x in batch.iterrows()]
#         texts = [x['RowAsJSON'] for i, x in batch.iterrows()]
#         embeds = embedding_model.embed_documents(texts)
#         # get metadata to store in Pinecone
#         metadata = [
#             {'text': x['RowAsJSON']} for i, x in batch.iterrows()
#         ]
#         print(metadata)
#         # add to Pinecone
#         index.upsert(vectors=zip(ids, embeds, metadata))
    
#     time.sleep(180)
    
    vectorstore = Pinecone(index, embedding_model.embed_query, text_field)
    
    return vectorstore

def sentiment(UserPrompt, PrevEntity):
                       
    MemoryRole = "You are a sentiment analysis assistant. You analyze users prompt and decide whether it requires a conversational context or not. If the user prompt requires memory addition, you will answer as 'Memory' else you will answer as 'Direct'. Make sure to focus on findind pronouns like That, Those, Them, Their"
    
    MemoryExamples=[
        {"role": "user", "content": "Can you share more details about the above license?"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Is the above better than license AAAAA"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Share details for all of the above"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Do above licenses require anything"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Can you Tell me more about this"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Can you tell me about their prices/fees"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Tell me more about it in detail"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "How is it better than service BB?"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "What is the price for it"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "How can I get that license"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Are there any alternatives to this ?"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Is there anything additional I should know"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Can you explain to me further ?"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "What are the key differences between those?"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Do I XXXXXX for this?"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "Can you elaborate more"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "How long does it take"},
        {"role": "assistant", "content": "Memory"},
        {"role": "user", "content": "How much time will it take?"},
        {"role": "assistant", "content": "Memory"}
        ]
    
    if_memory = MasterLLM(MemoryRole, MemoryExamples, UserPrompt)
    print(if_memory)
    print()
    
    listToStr = ', '.join([str(elem) for i, elem in enumerate(PrevEntity)]) 

    if if_memory in 'Direct' or len(PrevEntity) == 0:
        FullPrompt = UserPrompt
        PrevEntity = []
    else:
        FullPrompt = f"{UserPrompt}, where previous conversation had been about {listToStr}"
    
        
    return FullPrompt, PrevEntity