# -*- coding: utf-8 -*- """rag_petco_demo.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/18_KRvjMD1LU1vtBRMwbi91xSzuSKOtKI """ # Commented out IPython magic to ensure Python compatibility. # %pip install -q langchain openai chromadb tiktoken langchain-openai """## Imports""" from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import GPT4AllEmbeddings from langchain_community.vectorstores import FAISS from langchain.embeddings import OpenAIEmbeddings from langchain.document_loaders import TextLoader from langchain.document_loaders import CSVLoader from langchain.vectorstores import Chroma from langchain.chains import RetrievalQA from langchain.chains import VectorDBQA from langchain import PromptTemplate from langchain_openai import OpenAI from time import time import pandas as pd import numpy as np import getpass import re import os import gradio as gr #from google.colab import drive #drive.mount('/content/drive') os.environ['OPENAI_API_KEY']='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' final_df = pd.read_csv('petco_rag_df.csv') # merged_final_df.to_csv('/content/drive/Shareddrives/RAG_SYS_PETCO/data/petco_rag_img_df.csv', index=False) """## Vectorstore Setup Chroma #### Initial setup you don't need to run this cell; this cell is establishing chromadb """ # loader = CSVLoader("/content/drive/Shareddrives/RAG_SYS_PETCO/data/petco_rag_img_df.csv") # documents = loader.load() # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=400) # texts = text_splitter.split_documents(documents) # persist_directory = '/content/drive/Shareddrives/RAG_SYS_PETCO/data/data/chromadb' # embedding = OpenAIEmbeddings(model='text-embedding-3-large') # vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory) # vectordb.persist() """#### Load predefined chroma""" persist_directory = '/chromadb' embedding = OpenAIEmbeddings(model='text-embedding-3-large') vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) retriever = vectordb.as_retriever(search_type="similarity",search_kwargs={"k":50}) """## MultiQueryRetriever """ from langchain.retrievers.multi_query import MultiQueryRetriever from langchain_openai import ChatOpenAI llm = ChatOpenAI(model_name="gpt-4-turbo",temperature=0) retriever_from_llm = MultiQueryRetriever.from_llm( retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k":50}), llm=llm ) import logging logging.basicConfig() logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO) # question = """ # I have promotions for May 2024 for cats and dogs. The promotion is the customers get 30% discount on the select items for cat and dog. # Can you generated an email subject and body where it would result in high CTR from customers? # """ # docs = retriever_from_llm.get_relevant_documents(query=question) from langchain.prompts import PromptTemplate from langchain.chains import LLMChain qa_prompt = PromptTemplate( input_variables=['query','contexts'], template = """ You are a recommendation system that analyze the user's interest and generate an email subject and body for PETCO. If the question cannot be answered using the information provided answer with 'I don't know'. Context: {context} Question: {query}, """, ) qa_chain = LLMChain(llm=llm, prompt=qa_prompt) # out = qa_chain.invoke( # input={ # "query": question, # "context": "\n---\n".join([d.page_content for d in docs]) # } # ) # print(out["text"]) # question = """ # I have promotion for cat and dog. Can you generate a description that can be used for image generation? # """ # docs = retriever_from_llm.get_relevant_documents(query=question) # out = qa_chain.invoke( # input={ # "query": question, # "context": "\n---\n".join([d.page_content for d in docs]) # } # ) # print(out["text"]) def call_rag(question): docs = retriever_from_llm.get_relevant_documents(query=question) out = qa_chain.invoke( input={ "query": question, "context": "\n---\n".join([d.page_content for d in docs]) } ) return out["text"] """# Planning for the Final Presentation [April 26] * Additional data preprocessing * Include image information (GPT generated description of the images in the emails) * Adding customer data such as purchase history or pet info(if available) * Add image generation pipeline * Feed prompt generated from the retriever into Midjourney to generate images * Use actual images from Petco to evaluate the quality of the generated images * Experiments * Prompt engineering with few shot examples * Create question sets to evaluate the robustness of the retrieval model * Explore hyperparameters to adjust quality of the generation * End-to-end pipeline (if possible) * Use **sendgrid** API to automatically send the email from generated subject / body / images from pipeline """ interface = gr.Interface( fn=call_rag, inputs="text", outputs="text", title="Email Generation", description="Enter a prompt and get an example email generated.", ) interface.launch()