Demo / app.py
ajuneja23's picture
Update app.py
bd5195e verified
raw
history blame
No virus
9.1 kB
# -*- coding: utf-8 -*-
"""rag_petco_demo.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/18_KRvjMD1LU1vtBRMwbi91xSzuSKOtKI
"""
# Commented out IPython magic to ensure Python compatibility.
# %pip install -q langchain openai chromadb tiktoken langchain-openai
"""## Imports"""
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chains import VectorDBQA
from langchain import PromptTemplate
from langchain_openai import OpenAI
from time import time
import pandas as pd
import numpy as np
import getpass
import re
import os
#from google.colab import drive
#drive.mount('/content/drive')
os.environ['OPENAI_API_KEY']='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'
final_df = pd.read_csv('petco_rag_df.csv')
final_df.head(2)
columns_to_drop = ['DAY_ID','SENDS','OPENS','OPEN_RATE','CLICKS','CTR','UNSUBSCRIBE_RATE']
vision_description = vision_description.drop(columns=columns_to_drop)
vision_description.head(2)
merged_final_df = pd.merge(final_df, vision_description, on='CAMPAIGN_NAME', how='inner')
merged_final_df.head(2)
merged_final_df = merged_final_df.drop('Unnamed: 0', axis=1)
# merged_final_df.to_csv('/content/drive/Shareddrives/RAG_SYS_PETCO/data/petco_rag_img_df.csv', index=False)
"""## Vectorstore Setup
Chroma
#### Initial setup
you don't need to run this cell; this cell is establishing chromadb
"""
# loader = CSVLoader("/content/drive/Shareddrives/RAG_SYS_PETCO/data/petco_rag_img_df.csv")
# documents = loader.load()
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=400)
# texts = text_splitter.split_documents(documents)
# persist_directory = '/content/drive/Shareddrives/RAG_SYS_PETCO/data/data/chromadb'
# embedding = OpenAIEmbeddings(model='text-embedding-3-large')
# vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)
# vectordb.persist()
"""#### Load predefined chroma"""
persist_directory = '/content/drive/Shareddrives/RAG_SYS_PETCO/data/data/chromadb'
embedding = OpenAIEmbeddings(model='text-embedding-3-large')
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
retriever = vectordb.as_retriever(search_type="similarity",search_kwargs={"k":50})
"""## MultiQueryRetriever
"""
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-4-turbo",temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(
retriever=vectordb.as_retriever(search_type="mmr",
search_kwargs={"k":50}),
llm=llm
)
import logging
logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)
question = """
I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog.
Can you generated an email subject and body where it would result in high CTR from customers?
"""
docs = retriever_from_llm.get_relevant_documents(query=question)
len(docs)
docs[:5]
docs[2].page_content
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
qa_prompt = PromptTemplate(
input_variables=['query','contexts'],
template = """
You are a recommendation system that analyze the user's interest and
generate an email subject and body for PETCO. If the
question cannot be answered using the information provided answer
with 'I don't know'.
Context: {context}
Question: {query},
""",
)
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
out = qa_chain.invoke(
input={
"query": question,
"context": "\n---\n".join([d.page_content for d in docs])
}
)
print(out["text"])
question = """
I have promotion for cat and dog. Can you generate a description that can be used for image generation?
"""
docs = retriever_from_llm.get_relevant_documents(query=question)
out = qa_chain.invoke(
input={
"query": question,
"context": "\n---\n".join([d.page_content for d in docs])
}
)
print(out["text"])
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[
{"role": "system", "content": "You are an assistant for PETCO to generate description for DALL-E prompt to generate images for their promotional email. Can you generate prompt for DALL-E without explanation?"},
{"role": "user", "content": "I need to generate images for cat and dog sales can you create prompt for image"}
]
)
response.choices[0].message.content
"""## Dall-E integration"""
from PIL import Image
import requests
from openai import OpenAI
client = OpenAI()
response = client.images.generate(
model="dall-e-3",
prompt="""The image shows a woman brushing the teeth of a tricolor dog, likely an Australian Shepherd, in a bathroom setting. She is smiling and appears to be gently handling the dog, who is standing and looking upwards. The image also features a play button, indicating it is a clickable video link, and text below the play button stating "Watch the video (3:04)," suggesting the video is 3 minutes and 4 seconds long.""",
size="1024x1024",
quality="standard",
n=1,
)
image_url = response.data[0].url
im = Image.open(requests.get(image_url, stream=True).raw)
im
"""# Experiments - Don't run"""
from openai import OpenAI
client = OpenAI()
def generate_query():
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."},
{"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."}
]
)
return response.choices[0].message.content
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a employee from PETCO. Your job is to create a query that you can ask the recommendation system which needs information such as % of promotion range from 5~50%, month in 2024 range from may to december, and animal that range from cat, dog, leptile, birds, fish, and etc. Make sure to only generate one response without explanation."},
{"role": "user", "content": "Can you create a prompt such as 'I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. Can you generated an email subject and body where it would result in high CTR from customers?' I Want the discount rate, animals, month, and animals be different in each prompt. Do not generate the email but only create a single prompt."}
]
)
print(response.choices[0].message.content)
generate_query()
for i in range(0, 149):
print("=================== Start ===================")
print(f'{i+1} iteration')
question = generate_query()
print("Question:")
print(question)
print()
docs = retriever_from_llm.get_relevant_documents(query=question)
out = qa_chain.invoke(
input={
"query": question,
"context": "\n---\n".join([d.page_content for d in docs])
}
)
print(out["text"])
print()
print("==================== End ====================")
print()
"""# Planning for the Final Presentation [April 26]
* Additional data preprocessing
* Include image information (GPT generated description of the images in the emails)
* Adding customer data such as purchase history or pet info(if available)
* Add image generation pipeline
* Feed prompt generated from the retriever into Midjourney to generate images
* Use actual images from Petco to evaluate the quality of the generated images
* Experiments
* Prompt engineering with few shot examples
* Create question sets to evaluate the robustness of the retrieval model
* Explore hyperparameters to adjust quality of the generation
* End-to-end pipeline (if possible)
* Use **sendgrid** API to automatically send the email from generated subject / body / images from pipeline
"""