Spaces:

praneeth-hakeem-patrick
/

backend

Sleeping

App Files Files Community

palexis3 commited on Jun 10

Commit

9914661

•

1 Parent(s): 39fecc7

Implemented rag to respond to user queries

Browse files

Files changed (1) hide show

app/service/transactions_query_rag.py +117 -0

app/service/transactions_query_rag.py CHANGED Viewed

	@@ -0,0 +1,117 @@

+from pinecone import Pinecone, ServerlessSpec
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+from langchain_pinecone import PineconeVectorStore
+from langchain_openai import ChatOpenAI
+from langchain.chains import RetrievalQA
+from fastapi import HTTPException
+import os
+import pandas as pd
+from uuid import uuid4
+async def answer_query(df: pd.DataFrame, query: str) -> None:
+    """Creates an embedding of the transactions table and then returns the answer for the given query.
+    Args:
+        df (pd.DataFrame): DataFrame containing the transactions that a user has entered
+        query (str): The query the user will ask against said embedding
+    Returns:
+        str: Response to query
+    """
+    try:
+        batch_limit = 100
+        pinecone_api_key = os.environ['PINECONE_API_KEY']
+        openai_api_key = os.environ['OPENAI_API_KEY']
+        namespace = "transactionsvector"
+        pc = Pinecone(api_key=pinecone_api_key)
+        embeddings = OpenAIEmbeddings(
+            model="text-embedding-3-small",
+            openai_api_key=openai_api_key
+        )
+        index_name = "transactions_rag"
+        if index_name in pc.list_indexes().names():
+            pc.delete_index(index_name)
+        pc.create_index(
+            name=index_name,
+            dimension=1536,
+            metric="cosine",
+            spec=ServerlessSpec(
+                cloud="aws",
+                region="us-east-1"
+            )
+        )
+        index = pc.Index(index_name)
+        texts = []
+        all_texts = []
+        metadatas = []
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=100
+        )
+        for _, record in df.iterrows():
+            content_texts = text_splitter.split_text(record['content'])
+            metadata = {
+                'user_id': str(record['user_id'])
+            }
+            content_metadata = [{
+                "chunk": j, "text": text, **metadata
+            } for j, text in enumerate(content_texts)]
+            texts.extend(content_texts)
+            all_texts.extend(content_texts)
+            metadatas.extend(content_metadata)
+            # If we have reached the batch limit, then add the texts and reset
+            if len(texts) >= batch_limit:
+                ids = [str(uuid4()) for _ in range(len(texts))]
+                embeds = embeddings.embed_documents(texts)
+                index.upsert(vectors=zip(ids, embeds, metadatas))
+                texts = []
+                metadatas = []
+        if len(texts) > 0:
+            ids = [str(uuid4()) for _ in range(len(texts))]
+            embeds = embeddings.embed_documents(texts)
+            index.upsert(vectors=zip(ids, embeds, metadatas))
+        transactions_search = PineconeVectorStore.from_documents(
+            documents=all_texts,
+            index_name=index_name,
+            embedding=embeddings,
+            namespace=namespace
+        )
+        llm = ChatOpenAI(
+            openai_api_key=openai_api_key,
+            model_name="gpt-3.5-turbo",
+            temperature=0.0
+        )
+        qa = RetrievalQA.from_llm(
+            llm=llm,
+            retriever=transactions_search.as_retriever()
+        )
+        answer = qa.invoke(query)
+        return answer
+    except Exception as e:
+        raise HTTPException(status_code = 500, detail=f"fetch_pinecone_service error: {str(e)}")