"""rag_petco_demo.ipynb |
Automatically generated by Colab. |
Original file is located at |
https://colab.research.google.com/drive/18_KRvjMD1LU1vtBRMwbi91xSzuSKOtKI |
""" |
"""## Imports""" |
from langchain.text_splitter import RecursiveCharacterTextSplitter |
from langchain_community.embeddings import GPT4AllEmbeddings |
from langchain_community.vectorstores import FAISS |
from langchain.embeddings import OpenAIEmbeddings |
from langchain.document_loaders import TextLoader |
from langchain.document_loaders import CSVLoader |
from langchain.vectorstores import Chroma |
from langchain.chains import RetrievalQA |
from langchain.chains import VectorDBQA |
from langchain import PromptTemplate |
from langchain_openai import OpenAI |
from time import time |
import pandas as pd |
import numpy as np |
import getpass |
import re |
import os |
os.environ['OPENAI_API_KEY']='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' |
final_df = pd.read_csv('petco_rag_df.csv') |
final_df.head(2) |
vision_description = vision_description.drop(columns=columns_to_drop) |
vision_description.head(2) |
merged_final_df = pd.merge(final_df, vision_description, on='CAMPAIGN_NAME', how='inner') |
merged_final_df.head(2) |
merged_final_df = merged_final_df.drop('Unnamed: 0', axis=1) |
"""## Vectorstore Setup |
Chroma |
#### Initial setup |
you don't need to run this cell; this cell is establishing chromadb |
""" |
"""#### Load predefined chroma""" |
persist_directory = '/content/drive/Shareddrives/RAG_SYS_PETCO/data/data/chromadb' |
embedding = OpenAIEmbeddings(model='text-embedding-3-large') |
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) |
retriever = vectordb.as_retriever(search_type="similarity",search_kwargs={"k":50}) |
"""## MultiQueryRetriever |
""" |
from langchain.retrievers.multi_query import MultiQueryRetriever |
from langchain_openai import ChatOpenAI |
llm = ChatOpenAI(model_name="gpt-4-turbo",temperature=0) |
retriever_from_llm = MultiQueryRetriever.from_llm( |
retriever=vectordb.as_retriever(search_type="mmr", |
search_kwargs={"k":50}), |
llm=llm |
) |
import logging |
logging.basicConfig() |
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO) |
question = """ |
I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. |
Can you generated an email subject and body where it would result in high CTR from customers? |
""" |
docs = retriever_from_llm.get_relevant_documents(query=question) |
len(docs) |
docs[:5] |
docs[2].page_content |
from langchain.prompts import PromptTemplate |
from langchain.chains import LLMChain |
qa_prompt = PromptTemplate( |
input_variables=['query','contexts'], |
template = """ |
You are a recommendation system that analyze the user's interest and |
generate an email subject and body for PETCO. If the |
question cannot be answered using the information provided answer |
with 'I don't know'. |
Context: {context} |
Question: {query}, |
""", |
) |
qa_chain = LLMChain(llm=llm, prompt=qa_prompt) |
out = qa_chain.invoke( |
input={ |
"query": question, |
"context": "\n---\n".join([d.page_content for d in docs]) |
} |
) |
print(out["text"]) |
question = """ |
I have promotion for cat and dog. Can you generate a description that can be used for image generation? |
""" |
docs = retriever_from_llm.get_relevant_documents(query=question) |
out = qa_chain.invoke( |
input={ |
"query": question, |
"context": "\n---\n".join([d.page_content for d in docs]) |
} |
) |
print(out["text"]) |
response = client.chat.completions.create( |
model="gpt-4-turbo", |
messages=[ |
{"role": "system", "content": "You are an assistant for PETCO to generate description for DALL-E prompt to generate images for their promotional email. Can you generate prompt for DALL-E without explanation?"}, |
{"role": "user", "content": "I need to generate images for cat and dog sales can you create prompt for image"} |
] |
) |
response.choices[0].message.content |
"""## Dall-E integration""" |
from PIL import Image |
import requests |
from openai import OpenAI |
client = OpenAI() |
response = client.images.generate( |
model="dall-e-3", |
prompt="""The image shows a woman brushing the teeth of a tricolor dog, likely an Australian Shepherd, in a bathroom setting. She is smiling and appears to be gently handling the dog, who is standing and looking upwards. The image also features a play button, indicating it is a clickable video link, and text below the play button stating "Watch the video (3:04)," suggesting the video is 3 minutes and 4 seconds long.""", |
size="1024x1024", |
quality="standard", |
n=1, |
) |
image_url = response.data[0].url |
im = Image.open(requests.get(image_url, stream=True).raw) |
im |
"""# Experiments - Don't run""" |
from openai import OpenAI |
client = OpenAI() |
def generate_query(): |
response = client.chat.completions.create( |
model="gpt-4", |
messages=[ |
{"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."}, |
{"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."} |
] |
) |
return response.choices[0].message.content |
response = client.chat.completions.create( |
model="gpt-4", |
messages=[ |
{"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."}, |
{"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."} |
] |
) |
print(response.choices[0].message.content) |
generate_query() |
for i in range(0, 149): |
print("=================== Start ===================") |
print(f'{i+1} iteration') |
question = generate_query() |
print("Question:") |
print(question) |
print() |
docs = retriever_from_llm.get_relevant_documents(query=question) |
out = qa_chain.invoke( |
input={ |
"query": question, |
"context": "\n---\n".join([d.page_content for d in docs]) |
} |
) |
print(out["text"]) |
print() |
print("==================== End ====================") |
print() |
"""# Planning for the Final Presentation [April 26] |
* Additional data preprocessing |
* Include image information (GPT generated description of the images in the emails) |
* Adding customer data such as purchase history or pet info(if available) |
* Add image generation pipeline |
* Feed prompt generated from the retriever into Midjourney to generate images |
* Use actual images from Petco to evaluate the quality of the generated images |
* Experiments |
* Prompt engineering with few shot examples |
* Create question sets to evaluate the robustness of the retrieval model |
* Explore hyperparameters to adjust quality of the generation |
* End-to-end pipeline (if possible) |
* Use **sendgrid** API to automatically send the email from generated subject / body / images from pipeline |
""" |