Spaces:

petcoblue
/

Demo

Runtime error

App Files Files Community

Demo / app.py

ajuneja23

Update app.py

bd5195e verified 10 months ago

raw

history blame

9.1 kB

	# -- coding: utf-8 --
	"""rag_petco_demo.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/18_KRvjMD1LU1vtBRMwbi91xSzuSKOtKI
	"""

	# Commented out IPython magic to ensure Python compatibility.
	# %pip install -q langchain openai chromadb tiktoken langchain-openai

	"""## Imports"""

	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import GPT4AllEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.document_loaders import TextLoader
	from langchain.document_loaders import CSVLoader
	from langchain.vectorstores import Chroma
	from langchain.chains import RetrievalQA
	from langchain.chains import VectorDBQA
	from langchain import PromptTemplate
	from langchain_openai import OpenAI
	from time import time
	import pandas as pd
	import numpy as np
	import getpass
	import re
	import os

	#from google.colab import drive
	#drive.mount('/content/drive')

	os.environ['OPENAI_API_KEY']='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'


	final_df = pd.read_csv('petco_rag_df.csv')

	final_df.head(2)



	columns_to_drop = ['DAY_ID','SENDS','OPENS','OPEN_RATE','CLICKS','CTR','UNSUBSCRIBE_RATE']
	vision_description = vision_description.drop(columns=columns_to_drop)

	vision_description.head(2)

	merged_final_df = pd.merge(final_df, vision_description, on='CAMPAIGN_NAME', how='inner')

	merged_final_df.head(2)

	merged_final_df = merged_final_df.drop('Unnamed: 0', axis=1)

	# merged_final_df.to_csv('/content/drive/Shareddrives/RAG_SYS_PETCO/data/petco_rag_img_df.csv', index=False)

	"""## Vectorstore Setup

	Chroma

	#### Initial setup

	you don't need to run this cell; this cell is establishing chromadb
	"""

	# loader = CSVLoader("/content/drive/Shareddrives/RAG_SYS_PETCO/data/petco_rag_img_df.csv")
	# documents = loader.load()

	# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=400)
	# texts = text_splitter.split_documents(documents)

	# persist_directory = '/content/drive/Shareddrives/RAG_SYS_PETCO/data/data/chromadb'

	# embedding = OpenAIEmbeddings(model='text-embedding-3-large')
	# vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)

	# vectordb.persist()



	"""#### Load predefined chroma"""

	persist_directory = '/content/drive/Shareddrives/RAG_SYS_PETCO/data/data/chromadb'

	embedding = OpenAIEmbeddings(model='text-embedding-3-large')
	vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
	retriever = vectordb.as_retriever(search_type="similarity",search_kwargs={"k":50})

	"""## MultiQueryRetriever

	"""

	from langchain.retrievers.multi_query import MultiQueryRetriever
	from langchain_openai import ChatOpenAI

	llm = ChatOpenAI(model_name="gpt-4-turbo",temperature=0)
	retriever_from_llm = MultiQueryRetriever.from_llm(
	retriever=vectordb.as_retriever(search_type="mmr",
	search_kwargs={"k":50}),
	llm=llm
	)

	import logging

	logging.basicConfig()
	logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

	question = """
	I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog.
	Can you generated an email subject and body where it would result in high CTR from customers?
	"""

	docs = retriever_from_llm.get_relevant_documents(query=question)
	len(docs)

	docs[:5]

	docs[2].page_content

	from langchain.prompts import PromptTemplate
	from langchain.chains import LLMChain

	qa_prompt = PromptTemplate(
	input_variables=['query','contexts'],
	template = """
	You are a recommendation system that analyze the user's interest and
	generate an email subject and body for PETCO. If the
	question cannot be answered using the information provided answer
	with 'I don't know'.

	Context: {context}

	Question: {query},
	""",
	)

	qa_chain = LLMChain(llm=llm, prompt=qa_prompt)

	out = qa_chain.invoke(
	input={
	"query": question,
	"context": "\n---\n".join([d.page_content for d in docs])
	}
	)
	print(out["text"])

	question = """
	I have promotion for cat and dog. Can you generate a description that can be used for image generation?
	"""

	docs = retriever_from_llm.get_relevant_documents(query=question)

	out = qa_chain.invoke(
	input={
	"query": question,
	"context": "\n---\n".join([d.page_content for d in docs])
	}
	)
	print(out["text"])

	response = client.chat.completions.create(
	model="gpt-4-turbo",
	messages=[
	{"role": "system", "content": "You are an assistant for PETCO to generate description for DALL-E prompt to generate images for their promotional email. Can you generate prompt for DALL-E without explanation?"},
	{"role": "user", "content": "I need to generate images for cat and dog sales can you create prompt for image"}
	]
	)

	response.choices[0].message.content

	"""## Dall-E integration"""

	from PIL import Image
	import requests

	from openai import OpenAI
	client = OpenAI()

	response = client.images.generate(
	model="dall-e-3",
	prompt="""The image shows a woman brushing the teeth of a tricolor dog, likely an Australian Shepherd, in a bathroom setting. She is smiling and appears to be gently handling the dog, who is standing and looking upwards. The image also features a play button, indicating it is a clickable video link, and text below the play button stating "Watch the video (3:04)," suggesting the video is 3 minutes and 4 seconds long.""",
	size="1024x1024",
	quality="standard",
	n=1,
	)

	image_url = response.data[0].url

	im = Image.open(requests.get(image_url, stream=True).raw)
	im

	"""# Experiments - Don't run"""

	from openai import OpenAI

	client = OpenAI()

	def generate_query():
	response = client.chat.completions.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."},
	{"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."}
	]
	)
	return response.choices[0].message.content

	response = client.chat.completions.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."},
	{"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."}
	]
	)
	print(response.choices[0].message.content)

	generate_query()

	for i in range(0, 149):
	print("=================== Start ===================")
	print(f'{i+1} iteration')
	question = generate_query()
	print("Question:")
	print(question)
	print()
	docs = retriever_from_llm.get_relevant_documents(query=question)

	out = qa_chain.invoke(
	input={
	"query": question,
	"context": "\n---\n".join([d.page_content for d in docs])
	}
	)
	print(out["text"])
	print()
	print("==================== End ====================")
	print()

	"""# Planning for the Final Presentation [April 26]

	* Additional data preprocessing
	* Include image information (GPT generated description of the images in the emails)
	* Adding customer data such as purchase history or pet info(if available)

	* Add image generation pipeline
	* Feed prompt generated from the retriever into Midjourney to generate images
	* Use actual images from Petco to evaluate the quality of the generated images

	* Experiments
	* Prompt engineering with few shot examples
	* Create question sets to evaluate the robustness of the retrieval model
	* Explore hyperparameters to adjust quality of the generation

	* End-to-end pipeline (if possible)
	* Use sendgrid API to automatically send the email from generated subject / body / images from pipeline


	"""