# -*- coding: utf-8 -*- """rag_petco_demo.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/18_KRvjMD1LU1vtBRMwbi91xSzuSKOtKI """ # Commented out IPython magic to ensure Python compatibility. # %pip install -q langchain openai chromadb tiktoken langchain-openai """## Imports""" from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import GPT4AllEmbeddings from langchain_community.vectorstores import FAISS from langchain.embeddings import OpenAIEmbeddings from langchain.document_loaders import TextLoader from langchain.document_loaders import CSVLoader from langchain.vectorstores import Chroma from langchain.chains import RetrievalQA from langchain.chains import VectorDBQA from langchain import PromptTemplate from langchain_openai import OpenAI from time import time import pandas as pd import numpy as np import getpass import re import os #from google.colab import drive #drive.mount('/content/drive') os.environ['OPENAI_API_KEY']='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' final_df = pd.read_csv('petco_rag_df.csv') final_df.head(2) columns_to_drop = ['DAY_ID','SENDS','OPENS','OPEN_RATE','CLICKS','CTR','UNSUBSCRIBE_RATE'] vision_description = vision_description.drop(columns=columns_to_drop) vision_description.head(2) merged_final_df = pd.merge(final_df, vision_description, on='CAMPAIGN_NAME', how='inner') merged_final_df.head(2) merged_final_df = merged_final_df.drop('Unnamed: 0', axis=1) # merged_final_df.to_csv('/content/drive/Shareddrives/RAG_SYS_PETCO/data/petco_rag_img_df.csv', index=False) """## Vectorstore Setup Chroma #### Initial setup you don't need to run this cell; this cell is establishing chromadb """ # loader = CSVLoader("/content/drive/Shareddrives/RAG_SYS_PETCO/data/petco_rag_img_df.csv") # documents = loader.load() # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=400) # texts = text_splitter.split_documents(documents) # persist_directory = '/content/drive/Shareddrives/RAG_SYS_PETCO/data/data/chromadb' # embedding = OpenAIEmbeddings(model='text-embedding-3-large') # vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory) # vectordb.persist() """#### Load predefined chroma""" persist_directory = '/content/drive/Shareddrives/RAG_SYS_PETCO/data/data/chromadb' embedding = OpenAIEmbeddings(model='text-embedding-3-large') vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) retriever = vectordb.as_retriever(search_type="similarity",search_kwargs={"k":50}) """## MultiQueryRetriever """ from langchain.retrievers.multi_query import MultiQueryRetriever from langchain_openai import ChatOpenAI llm = ChatOpenAI(model_name="gpt-4-turbo",temperature=0) retriever_from_llm = MultiQueryRetriever.from_llm( retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k":50}), llm=llm ) import logging logging.basicConfig() logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO) question = """ I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers? """ docs = retriever_from_llm.get_relevant_documents(query=question) len(docs) docs[:5] docs[2].page_content from langchain.prompts import PromptTemplate from langchain.chains import LLMChain qa_prompt = PromptTemplate( input_variables=['query','contexts'], template = """ You are a recommendation system that analyze the user's interest and generate an email subject and body for PETCO. If the question cannot be answered using the information provided answer with 'I don't know'. Context: {context} Question: {query}, """, ) qa_chain = LLMChain(llm=llm, prompt=qa_prompt) out = qa_chain.invoke( input={ "query": question, "context": "\n---\n".join([d.page_content for d in docs]) } ) print(out["text"]) question = """ I have promotion for cat and dog. Can you generate a description that can be used for image generation? """ docs = retriever_from_llm.get_relevant_documents(query=question) out = qa_chain.invoke( input={ "query": question, "context": "\n---\n".join([d.page_content for d in docs]) } ) print(out["text"]) response = client.chat.completions.create( model="gpt-4-turbo", messages=[ {"role": "system", "content": "You are an assistant for PETCO to generate description for DALL-E prompt to generate images for their promotional email. Can you generate prompt for DALL-E without explanation?"}, {"role": "user", "content": "I need to generate images for cat and dog sales can you create prompt for image"} ] ) response.choices[0].message.content """## Dall-E integration""" from PIL import Image import requests from openai import OpenAI client = OpenAI() response = client.images.generate( model="dall-e-3", prompt="""The image shows a woman brushing the teeth of a tricolor dog, likely an Australian Shepherd, in a bathroom setting. She is smiling and appears to be gently handling the dog, who is standing and looking upwards. The image also features a play button, indicating it is a clickable video link, and text below the play button stating "Watch the video (3:04)," suggesting the video is 3 minutes and 4 seconds long.""", size="1024x1024", quality="standard", n=1, ) image_url = response.data[0].url im = Image.open(requests.get(image_url, stream=True).raw) im """# Experiments - Don't run""" from openai import OpenAI client = OpenAI() def generate_query(): response = client.chat.completions.create( model="gpt-4", messages=[ {"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."}, {"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."} ] ) return response.choices[0].message.content response = client.chat.completions.create( model="gpt-4", messages=[ {"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."}, {"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."} ] ) print(response.choices[0].message.content) generate_query() for i in range(0, 149): print("=================== Start ===================") print(f'{i+1} iteration') question = generate_query() print("Question:") print(question) print() docs = retriever_from_llm.get_relevant_documents(query=question) out = qa_chain.invoke( input={ "query": question, "context": "\n---\n".join([d.page_content for d in docs]) } ) print(out["text"]) print() print("==================== End ====================") print() """# Planning for the Final Presentation [April 26] * Additional data preprocessing * Include image information (GPT generated description of the images in the emails) * Adding customer data such as purchase history or pet info(if available) * Add image generation pipeline * Feed prompt generated from the retriever into Midjourney to generate images * Use actual images from Petco to evaluate the quality of the generated images * Experiments * Prompt engineering with few shot examples * Create question sets to evaluate the robustness of the retrieval model * Explore hyperparameters to adjust quality of the generation * End-to-end pipeline (if possible) * Use **sendgrid** API to automatically send the email from generated subject / body / images from pipeline """