|
|
|
"""rag_petco_demo.ipynb |
|
|
|
Automatically generated by Colab. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/18_KRvjMD1LU1vtBRMwbi91xSzuSKOtKI |
|
""" |
|
|
|
|
|
|
|
|
|
"""## Imports""" |
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.embeddings import GPT4AllEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
from langchain.embeddings import OpenAIEmbeddings |
|
from langchain.document_loaders import TextLoader |
|
from langchain.document_loaders import CSVLoader |
|
from langchain.vectorstores import Chroma |
|
from langchain.chains import RetrievalQA |
|
from langchain.chains import VectorDBQA |
|
from langchain import PromptTemplate |
|
from langchain_openai import OpenAI |
|
from time import time |
|
import pandas as pd |
|
import numpy as np |
|
import getpass |
|
import re |
|
import os |
|
|
|
|
|
|
|
|
|
os.environ['OPENAI_API_KEY']='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' |
|
|
|
|
|
final_df = pd.read_csv('petco_rag_df.csv') |
|
|
|
final_df.head(2) |
|
|
|
|
|
|
|
columns_to_drop = ['DAY_ID','SENDS','OPENS','OPEN_RATE','CLICKS','CTR','UNSUBSCRIBE_RATE'] |
|
vision_description = vision_description.drop(columns=columns_to_drop) |
|
|
|
vision_description.head(2) |
|
|
|
merged_final_df = pd.merge(final_df, vision_description, on='CAMPAIGN_NAME', how='inner') |
|
|
|
merged_final_df.head(2) |
|
|
|
merged_final_df = merged_final_df.drop('Unnamed: 0', axis=1) |
|
|
|
|
|
|
|
"""## Vectorstore Setup |
|
|
|
Chroma |
|
|
|
#### Initial setup |
|
|
|
you don't need to run this cell; this cell is establishing chromadb |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""#### Load predefined chroma""" |
|
|
|
persist_directory = '/content/drive/Shareddrives/RAG_SYS_PETCO/data/data/chromadb' |
|
|
|
embedding = OpenAIEmbeddings(model='text-embedding-3-large') |
|
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) |
|
retriever = vectordb.as_retriever(search_type="similarity",search_kwargs={"k":50}) |
|
|
|
"""## MultiQueryRetriever |
|
|
|
""" |
|
|
|
from langchain.retrievers.multi_query import MultiQueryRetriever |
|
from langchain_openai import ChatOpenAI |
|
|
|
llm = ChatOpenAI(model_name="gpt-4-turbo",temperature=0) |
|
retriever_from_llm = MultiQueryRetriever.from_llm( |
|
retriever=vectordb.as_retriever(search_type="mmr", |
|
search_kwargs={"k":50}), |
|
llm=llm |
|
) |
|
|
|
import logging |
|
|
|
logging.basicConfig() |
|
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO) |
|
|
|
question = """ |
|
I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. |
|
Can you generated an email subject and body where it would result in high CTR from customers? |
|
""" |
|
|
|
docs = retriever_from_llm.get_relevant_documents(query=question) |
|
len(docs) |
|
|
|
docs[:5] |
|
|
|
docs[2].page_content |
|
|
|
from langchain.prompts import PromptTemplate |
|
from langchain.chains import LLMChain |
|
|
|
qa_prompt = PromptTemplate( |
|
input_variables=['query','contexts'], |
|
template = """ |
|
You are a recommendation system that analyze the user's interest and |
|
generate an email subject and body for PETCO. If the |
|
question cannot be answered using the information provided answer |
|
with 'I don't know'. |
|
|
|
Context: {context} |
|
|
|
Question: {query}, |
|
""", |
|
) |
|
|
|
qa_chain = LLMChain(llm=llm, prompt=qa_prompt) |
|
|
|
out = qa_chain.invoke( |
|
input={ |
|
"query": question, |
|
"context": "\n---\n".join([d.page_content for d in docs]) |
|
} |
|
) |
|
print(out["text"]) |
|
|
|
question = """ |
|
I have promotion for cat and dog. Can you generate a description that can be used for image generation? |
|
""" |
|
|
|
docs = retriever_from_llm.get_relevant_documents(query=question) |
|
|
|
out = qa_chain.invoke( |
|
input={ |
|
"query": question, |
|
"context": "\n---\n".join([d.page_content for d in docs]) |
|
} |
|
) |
|
print(out["text"]) |
|
|
|
response = client.chat.completions.create( |
|
model="gpt-4-turbo", |
|
messages=[ |
|
{"role": "system", "content": "You are an assistant for PETCO to generate description for DALL-E prompt to generate images for their promotional email. Can you generate prompt for DALL-E without explanation?"}, |
|
{"role": "user", "content": "I need to generate images for cat and dog sales can you create prompt for image"} |
|
] |
|
) |
|
|
|
response.choices[0].message.content |
|
|
|
"""## Dall-E integration""" |
|
|
|
from PIL import Image |
|
import requests |
|
|
|
from openai import OpenAI |
|
client = OpenAI() |
|
|
|
response = client.images.generate( |
|
model="dall-e-3", |
|
prompt="""The image shows a woman brushing the teeth of a tricolor dog, likely an Australian Shepherd, in a bathroom setting. She is smiling and appears to be gently handling the dog, who is standing and looking upwards. The image also features a play button, indicating it is a clickable video link, and text below the play button stating "Watch the video (3:04)," suggesting the video is 3 minutes and 4 seconds long.""", |
|
size="1024x1024", |
|
quality="standard", |
|
n=1, |
|
) |
|
|
|
image_url = response.data[0].url |
|
|
|
im = Image.open(requests.get(image_url, stream=True).raw) |
|
im |
|
|
|
"""# Experiments - Don't run""" |
|
|
|
from openai import OpenAI |
|
|
|
client = OpenAI() |
|
|
|
def generate_query(): |
|
response = client.chat.completions.create( |
|
model="gpt-4", |
|
messages=[ |
|
{"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."}, |
|
{"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."} |
|
] |
|
) |
|
return response.choices[0].message.content |
|
|
|
response = client.chat.completions.create( |
|
model="gpt-4", |
|
messages=[ |
|
{"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."}, |
|
{"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."} |
|
] |
|
) |
|
print(response.choices[0].message.content) |
|
|
|
generate_query() |
|
|
|
for i in range(0, 149): |
|
print("=================== Start ===================") |
|
print(f'{i+1} iteration') |
|
question = generate_query() |
|
print("Question:") |
|
print(question) |
|
print() |
|
docs = retriever_from_llm.get_relevant_documents(query=question) |
|
|
|
out = qa_chain.invoke( |
|
input={ |
|
"query": question, |
|
"context": "\n---\n".join([d.page_content for d in docs]) |
|
} |
|
) |
|
print(out["text"]) |
|
print() |
|
print("==================== End ====================") |
|
print() |
|
|
|
"""# Planning for the Final Presentation [April 26] |
|
|
|
* Additional data preprocessing |
|
* Include image information (GPT generated description of the images in the emails) |
|
* Adding customer data such as purchase history or pet info(if available) |
|
|
|
* Add image generation pipeline |
|
* Feed prompt generated from the retriever into Midjourney to generate images |
|
* Use actual images from Petco to evaluate the quality of the generated images |
|
|
|
* Experiments |
|
* Prompt engineering with few shot examples |
|
* Create question sets to evaluate the robustness of the retrieval model |
|
* Explore hyperparameters to adjust quality of the generation |
|
|
|
* End-to-end pipeline (if possible) |
|
* Use **sendgrid** API to automatically send the email from generated subject / body / images from pipeline |
|
|
|
|
|
""" |
|
|
|
|