Spaces:

jet-taekyo
/

AIE4_MVP

Build error

+FROM python:3.11
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+RUN mkdir -p $HOME/app/VECTOR_STORE_CACHE
+RUN mkdir -p $HOME/app/E2E_CACHE
+EXPOSE 7860
+CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+### Import Section ###
+import os
+import re
+import chainlit as cl
+from langchain.storage import LocalFileStore
+from operator import itemgetter
+from langchain_core.runnables import RunnablePassthrough, RunnableLambda, Runnable, RunnableParallel
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai import ChatOpenAI
+from chainlit.types import AskFileResponse
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_openai.embeddings import OpenAIEmbeddings
+from qdrant_client import QdrantClient
+from qdrant_client.models import VectorParams, Distance
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain_qdrant import QdrantVectorStore
+from langchain.schema import StrOutputParser
+from langchain_core.documents import Document
+from typing import cast
+from dotenv import load_dotenv
+### Emvironment Variables ###
+load_dotenv('.env')
+### Global Section ###
+VECTOR_STORE_CACHE = LocalFileStore(root_path = "VECTOR_STORE_CACHE")
+E2E_CACHE = LocalFileStore(root_path = "E2E_CACHE")
+#😉 helper functions
+def clean_text(text: str) -> str:
+    return re.sub(r'[^a-zA-Z0-9]', '', text)
+def caching_rag_respnse(question: str, answer:str):
+    E2E_CACHE.mset( [(clean_text(question), answer.encode('utf-8'))]  )
+def load_cached_response(input) :
+    question = clean_text(input['question'])
+    cached_answer = E2E_CACHE.mget([question])[0]
+    return cached_answer.decode('utf-8') if cached_answer else False
+#😉 prompt
+RAG_SYSTEM_MSG_TEMPLATE = """\
+You are a helpful assistant that uses the provided context to answer questions. If Context does not coantain any information to answer Question, just say "I don't know".
+Question:
+{question}
+Context:
+{context}
+"""
+RAG_PROMPT = ChatPromptTemplate([('human', RAG_SYSTEM_MSG_TEMPLATE)])
+#😉 retriever
+async def get_retriever(filename: str, chunks: list[Document]):
+    client = QdrantClient(":memory:")
+    core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+    cached_embedder = CacheBackedEmbeddings.from_bytes_store(
+        underlying_embeddings = core_embeddings,
+        document_embedding_cache = VECTOR_STORE_CACHE,
+        namespace=core_embeddings.model
+    )
+    collection_name = f"pdf_to_parse_{filename}"
+    if collection_name not in (x.name for x in client.get_collections().collections):
+        client.create_collection(
+            collection_name=collection_name,
+            vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
+        )
+        vectorstore = QdrantVectorStore(
+            client=client,
+            collection_name=collection_name,
+            embedding=cached_embedder
+            )
+        vectorstore.add_documents(chunks)
+        already_exist = False
+    else:
+        vectorstore = QdrantVectorStore(
+            client=client,
+            collection_name=collection_name,
+            embedding=cached_embedder
+            )
+        already_exist = True
+    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
+    return retriever, already_exist
+def get_rag(retriever):
+    chat_model = ChatOpenAI(model="gpt-4o-mini", streaming=True)
+    rag_chain =  RunnableParallel(
+        context = retriever,
+        question = lambda x: x
+    )| RAG_PROMPT | chat_model | StrOutputParser()
+    rag_chain = rag_chain.with_config({'run_name':'RAG'})
+    return rag_chain
+### On Chat Start (Session Start) Section ###
+@cl.on_chat_start
+async def on_chat_start():
+    """ SESSION SPECIFIC CODE HERE """
+    files = None
+    # Wait for the user to upload a file
+    while files == None:
+        files = await cl.AskFileMessage(
+            content="Hello!! I'm Jet! Please upload a Pdf File file to begin!",
+            accept=["application/pdf"],
+            max_size_mb=10,
+            timeout=180,
+        ).send()
+    file = files[0]
+    msg = cl.Message(content=f"Processing `{file.name}`...")
+    await msg.send()
+    documents = PyMuPDFLoader(file.path).load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    chunks = await text_splitter.atransform_documents(documents)
+    # get rag chain
+    retriever, already_exist = await get_retriever(file.name.split('pdf')[0], chunks)
+    rag_chain = get_rag(retriever)
+    # Let the user know that the system is ready
+    if not already_exist:
+        msg.content = f"Processing `{file.name}` done. You can now ask questions!"
+    else:
+        msg.content = f"VectorStore already exist. You can now ask questions!"
+    await msg.update()
+    cl.user_session.set("chain", rag_chain)
+### Rename Chains ###
+@cl.author_rename
+def rename(orig_author: str):
+    """ RENAME CODE HERE """
+    rename_dict = {"Assistant": "Jet"}
+    return rename_dict.get(orig_author, orig_author)
+### On Message Section ###
+@cl.on_message
+async def main(message):
+    """
+    MESSAGE CODE HERE
+    """
+    cached_answer = load_cached_response({'question':message.content})
+    if cached_answer:
+        msg = cl.Message(content=cached_answer)
+        await msg.send()
+    else:
+        chain = cast(Runnable, cl.user_session.get("chain"))
+        msg = cl.Message(content="")
+        async for stream_resp in chain.astream(message.content):
+            await msg.stream_token(stream_resp)
+        caching_rag_respnse(question=message.content, answer=msg.content)
+        await msg.send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# Welcome to Chainlit! 🚀🤖
+Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
+## Useful Links 🔗
+- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
+- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
+We can't wait to see what you create with Chainlit! Happy coding! 💻😊
+## Welcome screen
+To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.

compose.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+version: '3.8'  # Docker Compose file version
+services:
+  webapp:
+    image: week8:latest  # The name of your Docker image
+    container_name: test  # Name of the container
+    ports:
+      - "7860:7860"  # Map port 7860 on the host to port 7860 in the container
+    volumes:
+      - vector_store_cache:/home/user/app/VECTOR_STORE_CACHE
+      - e2e_cache:/home/user/app/E2E_CACHE
+    command: chainlit run app.py --host 0.0.0.0 --port 7860
+volumes:
+  vector_store_cache:
+  e2e_cache:

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+chainlit==1.2.0
+langchain==0.3.1
+langchain-openai==0.2.1
+langchain-community==0.3.1
+qdrant-client==1.11.3
+langchain-qdrant==0.1.4
+PyMuPDF==1.24.10
+python-dotenv==1.0.1