Spaces:

llmdi
/

gradioSCB

Running

App Files Files Community

gradioSCB / Utility.py

llmdi

Upload 17 files

191a7c1 over 1 year ago

raw

history blame contribute delete

17.8 kB

	import openai
	import langchain
	import os
	import json
	from pprint import pprint
	import pinecone
	import time
	from langchain.chat_models import AzureChatOpenAI
	from openai.embeddings_utils import get_embedding, cosine_similarity
	import tiktoken
	from langchain.vectorstores import Pinecone
	from langchain.chains import RetrievalQA
	from langchain.chains import ConversationalRetrievalChain, ConversationChain
	from langchain.memory import ConversationBufferWindowMemory
	from typing import Optional
	import pandas as pd
	import ast
	from langchain.prompts.chat import (
	ChatPromptTemplate,
	SystemMessagePromptTemplate,
	AIMessagePromptTemplate,
	HumanMessagePromptTemplate,
	)
	# from langchain.chains.openai_functions import (
	# create_openai_fn_chain,
	# create_structured_output_chain,
	# )
	from langchain.llms import OpenAI
	from langchain.chains import LLMChain
	from langchain.prompts.few_shot import FewShotPromptTemplate
	from langchain.chat_models import ChatOpenAI
	# from langchain.prompts import chat_prompt
	from langchain.prompts import ChatPromptTemplate
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.prompts import PromptTemplate
	from langchain.retrievers import BM25Retriever, EnsembleRetriever
	from langchain.vectorstores import FAISS
	from tqdm.autonotebook import tqdm



	openai.api_type = "azure"
	openai.api_base = "https://di-sandbox-gpt4.openai.azure.com/"
	openai.api_version = "2023-07-01-preview"
	openai.api_key = "69ec3919a7314784be9c4f7414286fba"

	os.environ['OPENAI_API_KEY']=openai.api_key
	os.environ['OPENAI_API_BASE'] = openai.api_base
	os.environ['OPENAI_API_VERSION'] = openai.api_version
	os.environ['OPENAI_API_TYPE'] = openai.api_type

	PINECONE_API_KEY = '49e9d57f-ca7b-45d8-9fe5-b02db54b2dc7'

	pinecone.init(
	api_key=os.environ.get('PINECONE_API_KEY') or '49e9d57f-ca7b-45d8-9fe5-b02db54b2dc7',
	environment=os.environ.get('PINECONE_ENVIRONMENT') or 'gcp-starter'
	)


	def MasterLLM(Role, FewShotExamples, UserPrompt):
	Role = [{"role": "system", "content": Role}]
	UserPrompt = [{"role": "user", "content": UserPrompt}]
	message = [Role, FewShotExamples, *UserPrompt]
	response = openai.ChatCompletion.create(
	engine="GPT4_32k",
	messages= message,
	temperature=0,
	# max_tokens=350,
	# top_p=0.95,
	# frequency_penalty=0,
	# presence_penalty=0,
	# stop=None
	)
	return response['choices'][0]['message']['content']

	def intent_recognition(UserPrompt):

	IntentRole = "You are an expert intent classifier. Your job is to understand user input and classify into either of the following 'Describe', 'Compare', 'Others' and 'Transfer' as per the given examples.Do not explain"

	IntentExamples = [
	{"role": "user", "content": "What is the difference between step X and step y for a opening a XYZ?"},
	{"role": "assistant", "content": "Compare"},
	{"role": "user", "content": "Compare differences between the following services"},
	{"role": "assistant", "content": "Compare"},
	{"role": "user", "content": "Differentiate the following services on the basis of price"},
	{"role": "assistant", "content": "Compare"},
	{"role": "user", "content": "Which of the follwing is better"},
	{"role": "assistant", "content": "Compare"},
	{"role": "user", "content": "Explain the difference between pricing of service A and B"},
	{"role": "assistant", "content": "Compare"},
	{"role": "user", "content": "Why should I choose service A over service B"},
	{"role": "assistant", "content": "Compare"},
	{"role": "user", "content": "Weigh the following services on their requirements"},
	{"role": "assistant", "content": "Compare"},
	{"role": "user", "content": "Service A vs Service B"},
	{"role": "assistant", "content": "Compare"},
	{"role": "user", "content": "Describe requirements for the following service"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "Tell me about A"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "Tell me about wholesale of software and retail sale of software"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "How can I appply for the following service"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "What are the conditions for A"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "How much is the price to apply for the following service"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "Explain the requirements of service A"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "Define the important characteristics of the following service"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "Difference between the following services"},
	{"role": "assistant", "content": "Compare"},
	{"role": "user", "content": "How to do service A"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "How much time to get A for License B"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "Can you help me with XYZVS"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "Please help me with XYZVS"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "How to enquire service XYZVS"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "How to access the service XYZVS"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "How to get information on service XYZVS"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "How to find about service XYZVS"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "Provide info on something"},
	{"role": "assistant", "content": "Describe"},
	{"role": "user", "content": "Can you send this to me"},
	{"role": "assistant", "content": "Transfer"},
	{"role": "user", "content": "Send it to my number"},
	{"role": "assistant", "content": "Transfer"},
	{"role": "user", "content": "This is my number XXXXXXXXX Send it to me"},
	{"role": "assistant", "content": "Transfer"},
	{"role": "user", "content": "Transfer it to my nummber"},
	{"role": "assistant", "content": "Transfer"},
	{"role": "user", "content": "Send this to me on Whatsapp"},
	{"role": "assistant", "content": "Transfer"},
	{"role": "user", "content": "I want to see this on my phone"},
	{"role": "assistant", "content": "Transfer"},
	{"role": "user", "content": "Whatsapp it to me"},
	{"role": "assistant", "content": "Transfer"},
	{"role": "user", "content": "WhatsApp it to me please"},
	{"role": "assistant", "content": "Transfer"}
	]

	intent = MasterLLM(IntentRole, IntentExamples, UserPrompt)

	return intent

	def entity_recognition_main(UserPrompt, LicenseName):
	entity = []

	try:
	EntityRole = f"You are a Named Entity Recognition expert. You can find relevant entities from a list based on the mentioned user prompt. Following is a list of license names: {LicenseName}. Extract relevant entities from the list of license names based on User Prompt.Extract exact license names from the list matching the extracted entities. Make sure the extracted entities are present in the given list of license names. Strictly make sure that output includes the entity from the given list of license name and not the entity from User Prompt. Answer should only be in a python list format. Make sure to match entities on synonyms as well. Remove duplicate entities from the response. If there is no match, print blank list. Again, strictly make sure the entity returned must be an exact match of license names in the provided list"
	EntityExamples = []
	entity = ast.literal_eval(MasterLLM(EntityRole, EntityExamples, UserPrompt))

	except:
	entity = []

	entity = list(set(entity))

	return entity


	def entity_recognition_sub(UserPrompt, LicenseFetched,License_Service):
	entity = []

	Final = License_Service[License_Service['License Name'].isin(LicenseFetched)]
	# print(Final)
	try:
	EntityRole = f"You are a Named Entity Recognition expert. You can find relevant entities from a list based on the mentioned user prompt. Using the following data: {Final}. Extract relevant entities from the list of service names based on User Prompt.Extract exact service names from the list matching the extracted entities. Make sure the extracted entities are present in the given list of service names. Strictly make sure that output includes the entity from the given list of service name and not the entity from User Prompt. Answer should only be in a python list format. Make sure to match entities on synonyms as well, like service fees as price. Remove duplicate entities from the response. If there is no match, print blank list. Again, strictly make sure the entity returned must be an exact match of service names in the provided list"
	EntityExamples = []
	entity = ast.literal_eval(MasterLLM(EntityRole, EntityExamples, UserPrompt))

	except:
	entity = []

	entity = list(set(entity))

	return entity

	def describe(entity, final_json, UserPrompt):

	DescribeRole = "You are an online assistant for Saudi Business Centre. You will provide concise and to the point answers to the user questions. Ensure answers are in bullets only. Do not assume anything while providing output. Make sure the tone is formal and friendly. Make sure put the answer like a conversation with filler words. Ensure that the answers are in a helpful tone and user friendly.Make sure the answer is not detailed unless asked specifically by the user,strictly a maximum of four lines. But make sure the short answer must cover the crux of all entities in the given context as per user prompt"
	DescribeExamples = []
	UserPrompt = f"Describe {entity} on the following data {final_json} as per the following question {UserPrompt}."

	final_text = MasterLLM(DescribeRole, DescribeExamples, UserPrompt)

	return final_text

	def compare(entity, final_json, UserPrompt):

	CompareRole = "You are an online assistant for Saudi Business Centre. You will provide concise and to the point answers to the user questions. Ensure the answers are in bullets only. Do not assume anything while providing output. Make sure the tone is formal and friendly. Make sure put the answer like a conversation with filler words. Make sure the answer is not detailed unless asked specifically by the user, strictly a maximum of four lines. But make sure the short answer must cover the crux of all entities in the given context as per user prompt Ensure that the answers are in a helpful tone and user friendly.. Provide answer to the following: {context}"
	CompareExamples = []
	UserPrompt = f"Compare the following {entity} on their following data {final_json} as per the following question {UserPrompt}"

	final_text = MasterLLM(CompareRole, CompareExamples, UserPrompt)

	return final_text

	def RAG(UserPrompt,entity,qa_chain_for_rag):

	listToStr = ', '.join([str(elem) for i, elem in enumerate(entity)])
	prompt = f"""You are an online assistant for Saudi Business Centre. You will provide concise and to the point answers to the user questions according to the context provided. Ensure the answers are in bullets only. Do not assume anything while providing output. Make sure the tone is formal and friendly.Make sure you're able to answer generic questions from the provided data. Answer the question given the information in those
	contexts. Apart from questions related to licenses and services, you can also use information from data to answer user queries about businesses, investments and how SBC can help them achieve the same. If you cannot find the answer to the question, say "I don't know".Answer the following User query. Make sure the answer is not detailed unless asked specifically by the user, strictly a maximum of four lines. But make sure the short answer must cover the crux of all entities in the given context as per user prompt"""

	if len(entity) == 0:
	UserPrompt = UserPrompt
	else:
	UserPrompt = UserPrompt + f" using the following licenses {listToStr}"

	final_text = qa_chain_for_rag.run(prompt+":"+UserPrompt)

	return final_text

	def set_vector_store(index_name):

	embedding_model = OpenAIEmbeddings(openai_api_key = openai.api_key,
	deployment="text-embedding-ada-002",
	model="text-embedding-ada-002",
	openai_api_base="https://di-sandbox-gpt4.openai.azure.com/",
	openai_api_type="azure")

	docs = ["Combined_data_final_V2.csv"]

	embeddings = embedding_model.embed_documents(docs)

	text_field = 'text' # field in metadata that contains text content

	if index_name not in pinecone.list_indexes():
	pinecone.create_index(
	index_name,
	dimension=len(embeddings[0]),
	metric='cosine'
	)
	# wait for index to finish initialization
	while not pinecone.describe_index(index_name).status['ready']:
	time.sleep(1)

	index = pinecone.Index(index_name)

	# time.sleep(180)

	# batch_size = 32

	# for i in range(0, len(data), batch_size):
	# i_end = min(len(data), i+batch_size)
	# batch = data.iloc[i:i_end]
	# ids = [f"{x['id']}" for i, x in batch.iterrows()]
	# texts = [x['RowAsJSON'] for i, x in batch.iterrows()]
	# embeds = embedding_model.embed_documents(texts)
	# # get metadata to store in Pinecone
	# metadata = [
	# {'text': x['RowAsJSON']} for i, x in batch.iterrows()
	# ]
	# print(metadata)
	# # add to Pinecone
	# index.upsert(vectors=zip(ids, embeds, metadata))

	# time.sleep(180)

	vectorstore = Pinecone(index, embedding_model.embed_query, text_field)

	return vectorstore

	def sentiment(UserPrompt, PrevEntity):

	MemoryRole = "You are a sentiment analysis assistant. You analyze users prompt and decide whether it requires a conversational context or not. If the user prompt requires memory addition, you will answer as 'Memory' else you will answer as 'Direct'. Make sure to focus on findind pronouns like That, Those, Them, Their"

	MemoryExamples=[
	{"role": "user", "content": "Can you share more details about the above license?"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Is the above better than license AAAAA"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Share details for all of the above"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Do above licenses require anything"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Can you Tell me more about this"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Can you tell me about their prices/fees"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Tell me more about it in detail"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "How is it better than service BB?"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "What is the price for it"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "How can I get that license"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Are there any alternatives to this ?"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Is there anything additional I should know"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Can you explain to me further ?"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "What are the key differences between those?"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Do I XXXXXX for this?"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "Can you elaborate more"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "How long does it take"},
	{"role": "assistant", "content": "Memory"},
	{"role": "user", "content": "How much time will it take?"},
	{"role": "assistant", "content": "Memory"}
	]

	if_memory = MasterLLM(MemoryRole, MemoryExamples, UserPrompt)
	print(if_memory)
	print()

	listToStr = ', '.join([str(elem) for i, elem in enumerate(PrevEntity)])

	if if_memory in 'Direct' or len(PrevEntity) == 0:
	FullPrompt = UserPrompt
	PrevEntity = []
	else:
	FullPrompt = f"{UserPrompt}, where previous conversation had been about {listToStr}"


	return FullPrompt, PrevEntity