Spaces:
Build error
Build error
Commit
Β·
dfa619a
0
Parent(s):
initial commit
Browse files- .dockerignore +5 -0
- .gitignore +5 -0
- Dockerfile +14 -0
- app.py +175 -0
- chainlit.md +14 -0
- compose.yml +16 -0
- requirements.txt +8 -0
.dockerignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
.chainlit/
|
3 |
+
E2E_CACHE/
|
4 |
+
VECTOR_STORE_CACHE/
|
5 |
+
.env
|
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
.chainlit/
|
3 |
+
E2E_CACHE/
|
4 |
+
VECTOR_STORE_CACHE/
|
5 |
+
.env
|
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11
|
2 |
+
RUN useradd -m -u 1000 user
|
3 |
+
USER user
|
4 |
+
ENV HOME=/home/user \
|
5 |
+
PATH=/home/user/.local/bin:$PATH
|
6 |
+
WORKDIR $HOME/app
|
7 |
+
COPY --chown=user . $HOME/app
|
8 |
+
COPY ./requirements.txt ~/app/requirements.txt
|
9 |
+
RUN pip install -r requirements.txt
|
10 |
+
COPY . .
|
11 |
+
RUN mkdir -p $HOME/app/VECTOR_STORE_CACHE
|
12 |
+
RUN mkdir -p $HOME/app/E2E_CACHE
|
13 |
+
EXPOSE 7860
|
14 |
+
CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### Import Section ###
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import chainlit as cl
|
5 |
+
from langchain.storage import LocalFileStore
|
6 |
+
from operator import itemgetter
|
7 |
+
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, Runnable, RunnableParallel
|
8 |
+
from langchain_core.prompts import ChatPromptTemplate
|
9 |
+
from langchain_openai import ChatOpenAI
|
10 |
+
from chainlit.types import AskFileResponse
|
11 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
12 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
13 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
14 |
+
from qdrant_client import QdrantClient
|
15 |
+
from qdrant_client.models import VectorParams, Distance
|
16 |
+
from langchain.embeddings import CacheBackedEmbeddings
|
17 |
+
from langchain_qdrant import QdrantVectorStore
|
18 |
+
from langchain.schema import StrOutputParser
|
19 |
+
from langchain_core.documents import Document
|
20 |
+
from typing import cast
|
21 |
+
from dotenv import load_dotenv
|
22 |
+
|
23 |
+
### Emvironment Variables ###
|
24 |
+
load_dotenv('.env')
|
25 |
+
|
26 |
+
### Global Section ###
|
27 |
+
VECTOR_STORE_CACHE = LocalFileStore(root_path = "VECTOR_STORE_CACHE")
|
28 |
+
E2E_CACHE = LocalFileStore(root_path = "E2E_CACHE")
|
29 |
+
|
30 |
+
#π helper functions
|
31 |
+
def clean_text(text: str) -> str:
|
32 |
+
return re.sub(r'[^a-zA-Z0-9]', '', text)
|
33 |
+
|
34 |
+
def caching_rag_respnse(question: str, answer:str):
|
35 |
+
E2E_CACHE.mset( [(clean_text(question), answer.encode('utf-8'))] )
|
36 |
+
|
37 |
+
def load_cached_response(input) :
|
38 |
+
question = clean_text(input['question'])
|
39 |
+
cached_answer = E2E_CACHE.mget([question])[0]
|
40 |
+
return cached_answer.decode('utf-8') if cached_answer else False
|
41 |
+
|
42 |
+
|
43 |
+
#π prompt
|
44 |
+
RAG_SYSTEM_MSG_TEMPLATE = """\
|
45 |
+
You are a helpful assistant that uses the provided context to answer questions. If Context does not coantain any information to answer Question, just say "I don't know".
|
46 |
+
|
47 |
+
Question:
|
48 |
+
{question}
|
49 |
+
Context:
|
50 |
+
{context}
|
51 |
+
"""
|
52 |
+
RAG_PROMPT = ChatPromptTemplate([('human', RAG_SYSTEM_MSG_TEMPLATE)])
|
53 |
+
|
54 |
+
|
55 |
+
#π retriever
|
56 |
+
async def get_retriever(filename: str, chunks: list[Document]):
|
57 |
+
client = QdrantClient(":memory:")
|
58 |
+
|
59 |
+
core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
60 |
+
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
|
61 |
+
underlying_embeddings = core_embeddings,
|
62 |
+
document_embedding_cache = VECTOR_STORE_CACHE,
|
63 |
+
namespace=core_embeddings.model
|
64 |
+
)
|
65 |
+
|
66 |
+
|
67 |
+
collection_name = f"pdf_to_parse_{filename}"
|
68 |
+
if collection_name not in (x.name for x in client.get_collections().collections):
|
69 |
+
client.create_collection(
|
70 |
+
collection_name=collection_name,
|
71 |
+
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
|
72 |
+
)
|
73 |
+
vectorstore = QdrantVectorStore(
|
74 |
+
client=client,
|
75 |
+
collection_name=collection_name,
|
76 |
+
embedding=cached_embedder
|
77 |
+
)
|
78 |
+
vectorstore.add_documents(chunks)
|
79 |
+
already_exist = False
|
80 |
+
else:
|
81 |
+
vectorstore = QdrantVectorStore(
|
82 |
+
client=client,
|
83 |
+
collection_name=collection_name,
|
84 |
+
embedding=cached_embedder
|
85 |
+
)
|
86 |
+
already_exist = True
|
87 |
+
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
|
88 |
+
return retriever, already_exist
|
89 |
+
|
90 |
+
|
91 |
+
def get_rag(retriever):
|
92 |
+
chat_model = ChatOpenAI(model="gpt-4o-mini", streaming=True)
|
93 |
+
rag_chain = RunnableParallel(
|
94 |
+
context = retriever,
|
95 |
+
question = lambda x: x
|
96 |
+
)| RAG_PROMPT | chat_model | StrOutputParser()
|
97 |
+
rag_chain = rag_chain.with_config({'run_name':'RAG'})
|
98 |
+
|
99 |
+
return rag_chain
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
### On Chat Start (Session Start) Section ###
|
106 |
+
@cl.on_chat_start
|
107 |
+
async def on_chat_start():
|
108 |
+
""" SESSION SPECIFIC CODE HERE """
|
109 |
+
files = None
|
110 |
+
|
111 |
+
# Wait for the user to upload a file
|
112 |
+
while files == None:
|
113 |
+
files = await cl.AskFileMessage(
|
114 |
+
content="Hello!! I'm Jet! Please upload a Pdf File file to begin!",
|
115 |
+
accept=["application/pdf"],
|
116 |
+
max_size_mb=10,
|
117 |
+
timeout=180,
|
118 |
+
).send()
|
119 |
+
|
120 |
+
|
121 |
+
file = files[0]
|
122 |
+
msg = cl.Message(content=f"Processing `{file.name}`...")
|
123 |
+
await msg.send()
|
124 |
+
|
125 |
+
|
126 |
+
documents = PyMuPDFLoader(file.path).load()
|
127 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
128 |
+
chunks = await text_splitter.atransform_documents(documents)
|
129 |
+
|
130 |
+
# get rag chain
|
131 |
+
retriever, already_exist = await get_retriever(file.name.split('pdf')[0], chunks)
|
132 |
+
rag_chain = get_rag(retriever)
|
133 |
+
|
134 |
+
# Let the user know that the system is ready
|
135 |
+
if not already_exist:
|
136 |
+
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
|
137 |
+
else:
|
138 |
+
msg.content = f"VectorStore already exist. You can now ask questions!"
|
139 |
+
await msg.update()
|
140 |
+
|
141 |
+
cl.user_session.set("chain", rag_chain)
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
### Rename Chains ###
|
146 |
+
@cl.author_rename
|
147 |
+
def rename(orig_author: str):
|
148 |
+
""" RENAME CODE HERE """
|
149 |
+
rename_dict = {"Assistant": "Jet"}
|
150 |
+
return rename_dict.get(orig_author, orig_author)
|
151 |
+
|
152 |
+
|
153 |
+
### On Message Section ###
|
154 |
+
@cl.on_message
|
155 |
+
async def main(message):
|
156 |
+
"""
|
157 |
+
MESSAGE CODE HERE
|
158 |
+
"""
|
159 |
+
|
160 |
+
cached_answer = load_cached_response({'question':message.content})
|
161 |
+
if cached_answer:
|
162 |
+
msg = cl.Message(content=cached_answer)
|
163 |
+
await msg.send()
|
164 |
+
else:
|
165 |
+
chain = cast(Runnable, cl.user_session.get("chain"))
|
166 |
+
|
167 |
+
msg = cl.Message(content="")
|
168 |
+
async for stream_resp in chain.astream(message.content):
|
169 |
+
await msg.stream_token(stream_resp)
|
170 |
+
|
171 |
+
caching_rag_respnse(question=message.content, answer=msg.content)
|
172 |
+
|
173 |
+
await msg.send()
|
174 |
+
|
175 |
+
|
chainlit.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Welcome to Chainlit! ππ€
|
2 |
+
|
3 |
+
Hi there, Developer! π We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
|
4 |
+
|
5 |
+
## Useful Links π
|
6 |
+
|
7 |
+
- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) π
|
8 |
+
- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! π¬
|
9 |
+
|
10 |
+
We can't wait to see what you create with Chainlit! Happy coding! π»π
|
11 |
+
|
12 |
+
## Welcome screen
|
13 |
+
|
14 |
+
To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
|
compose.yml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8' # Docker Compose file version
|
2 |
+
|
3 |
+
services:
|
4 |
+
webapp:
|
5 |
+
image: week8:latest # The name of your Docker image
|
6 |
+
container_name: test # Name of the container
|
7 |
+
ports:
|
8 |
+
- "7860:7860" # Map port 7860 on the host to port 7860 in the container
|
9 |
+
volumes:
|
10 |
+
- vector_store_cache:/home/user/app/VECTOR_STORE_CACHE
|
11 |
+
- e2e_cache:/home/user/app/E2E_CACHE
|
12 |
+
command: chainlit run app.py --host 0.0.0.0 --port 7860
|
13 |
+
|
14 |
+
volumes:
|
15 |
+
vector_store_cache:
|
16 |
+
e2e_cache:
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
chainlit==1.2.0
|
2 |
+
langchain==0.3.1
|
3 |
+
langchain-openai==0.2.1
|
4 |
+
langchain-community==0.3.1
|
5 |
+
qdrant-client==1.11.3
|
6 |
+
langchain-qdrant==0.1.4
|
7 |
+
PyMuPDF==1.24.10
|
8 |
+
python-dotenv==1.0.1
|