jjovalle commited on
Commit
32efd97
β€’
1 Parent(s): 61387c0
.chainlit/config.toml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+
6
+ # List of environment variables to be provided by each user to use the app.
7
+ user_env = []
8
+
9
+ # Duration (in seconds) during which the session is saved when the connection is lost
10
+ session_timeout = 3600
11
+
12
+ # Enable third parties caching (e.g LangChain cache)
13
+ cache = false
14
+
15
+ # Authorized origins
16
+ allow_origins = ["*"]
17
+
18
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
19
+ # follow_symlink = false
20
+
21
+ [features]
22
+ # Show the prompt playground
23
+ prompt_playground = false
24
+
25
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
26
+ unsafe_allow_html = false
27
+
28
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
29
+ latex = false
30
+
31
+ # Authorize users to upload files with messages
32
+ multi_modal = true
33
+
34
+ # Allows user to use speech to text
35
+ [features.speech_to_text]
36
+ enabled = false
37
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
38
+ # language = "en-US"
39
+
40
+ [UI]
41
+ # Name of the app and chatbot.
42
+ name = "Chatbot"
43
+
44
+ # Show the readme while the thread is empty.
45
+ show_readme_as_default = true
46
+
47
+ # Description of the app and chatbot. This is used for HTML tags.
48
+ # description = ""
49
+
50
+ # Large size content are by default collapsed for a cleaner ui
51
+ default_collapse_content = true
52
+
53
+ # The default value for the expand messages settings.
54
+ default_expand_messages = false
55
+
56
+ # Hide the chain of thought details from the user in the UI.
57
+ hide_cot = false
58
+
59
+ # Link to your github repo. This will add a github button in the UI's header.
60
+ # github = ""
61
+
62
+ # Specify a CSS file that can be used to customize the user interface.
63
+ # The CSS file can be served from the public directory or via an external link.
64
+ # custom_css = "/public/test.css"
65
+
66
+ # Specify a Javascript file that can be used to customize the user interface.
67
+ # The Javascript file can be served from the public directory.
68
+ # custom_js = "/public/test.js"
69
+
70
+ # Specify a custom font url.
71
+ # custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap"
72
+
73
+ # Override default MUI light theme. (Check theme.ts)
74
+ [UI.theme]
75
+ #font_family = "Inter, sans-serif"
76
+ [UI.theme.light]
77
+ #background = "#FAFAFA"
78
+ #paper = "#FFFFFF"
79
+
80
+ [UI.theme.light.primary]
81
+ #main = "#F80061"
82
+ #dark = "#980039"
83
+ #light = "#FFE7EB"
84
+
85
+ # Override default MUI dark theme. (Check theme.ts)
86
+ [UI.theme.dark]
87
+ #background = "#FAFAFA"
88
+ #paper = "#FFFFFF"
89
+
90
+ [UI.theme.dark.primary]
91
+ #main = "#F80061"
92
+ #dark = "#980039"
93
+ #light = "#FFE7EB"
94
+
95
+
96
+ [meta]
97
+ generated_by = "1.0.400"
.chainlit/translations/en-US.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "components": {
3
+ "atoms": {
4
+ "buttons": {
5
+ "userButton": {
6
+ "menu": {
7
+ "settings": "Settings",
8
+ "settingsKey": "S",
9
+ "APIKeys": "API Keys",
10
+ "logout": "Logout"
11
+ }
12
+ }
13
+ }
14
+ },
15
+ "molecules": {
16
+ "newChatButton": {
17
+ "newChat": "New Chat"
18
+ },
19
+ "tasklist": {
20
+ "TaskList": {
21
+ "title": "\ud83d\uddd2\ufe0f Task List",
22
+ "loading": "Loading...",
23
+ "error": "An error occured"
24
+ }
25
+ },
26
+ "attachments": {
27
+ "cancelUpload": "Cancel upload",
28
+ "removeAttachment": "Remove attachment"
29
+ },
30
+ "newChatDialog": {
31
+ "createNewChat": "Create new chat?",
32
+ "clearChat": "This will clear the current messages and start a new chat.",
33
+ "cancel": "Cancel",
34
+ "confirm": "Confirm"
35
+ },
36
+ "settingsModal": {
37
+ "expandMessages": "Expand Messages",
38
+ "hideChainOfThought": "Hide Chain of Thought",
39
+ "darkMode": "Dark Mode"
40
+ }
41
+ },
42
+ "organisms": {
43
+ "chat": {
44
+ "history": {
45
+ "index": {
46
+ "lastInputs": "Last Inputs",
47
+ "noInputs": "Such empty...",
48
+ "loading": "Loading..."
49
+ }
50
+ },
51
+ "inputBox": {
52
+ "input": {
53
+ "placeholder": "Type your message here..."
54
+ },
55
+ "speechButton": {
56
+ "start": "Start recording",
57
+ "stop": "Stop recording"
58
+ },
59
+ "SubmitButton": {
60
+ "sendMessage": "Send message",
61
+ "stopTask": "Stop Task"
62
+ },
63
+ "UploadButton": {
64
+ "attachFiles": "Attach files"
65
+ },
66
+ "waterMark": {
67
+ "text": "Built with"
68
+ }
69
+ },
70
+ "Messages": {
71
+ "index": {
72
+ "running": "Running",
73
+ "executedSuccessfully": "executed successfully",
74
+ "failed": "failed",
75
+ "feedbackUpdated": "Feedback updated",
76
+ "updating": "Updating"
77
+ }
78
+ },
79
+ "dropScreen": {
80
+ "dropYourFilesHere": "Drop your files here"
81
+ },
82
+ "index": {
83
+ "failedToUpload": "Failed to upload",
84
+ "cancelledUploadOf": "Cancelled upload of",
85
+ "couldNotReachServer": "Could not reach the server",
86
+ "continuingChat": "Continuing previous chat"
87
+ },
88
+ "settings": {
89
+ "settingsPanel": "Settings panel",
90
+ "reset": "Reset",
91
+ "cancel": "Cancel",
92
+ "confirm": "Confirm"
93
+ }
94
+ },
95
+ "threadHistory": {
96
+ "sidebar": {
97
+ "filters": {
98
+ "FeedbackSelect": {
99
+ "feedbackAll": "Feedback: All",
100
+ "feedbackPositive": "Feedback: Positive",
101
+ "feedbackNegative": "Feedback: Negative"
102
+ },
103
+ "SearchBar": {
104
+ "search": "Search"
105
+ }
106
+ },
107
+ "DeleteThreadButton": {
108
+ "confirmMessage": "This will delete the thread as well as it's messages and elements.",
109
+ "cancel": "Cancel",
110
+ "confirm": "Confirm",
111
+ "deletingChat": "Deleting chat",
112
+ "chatDeleted": "Chat deleted"
113
+ },
114
+ "index": {
115
+ "pastChats": "Past Chats"
116
+ },
117
+ "ThreadList": {
118
+ "empty": "Empty..."
119
+ },
120
+ "TriggerButton": {
121
+ "closeSidebar": "Close sidebar",
122
+ "openSidebar": "Open sidebar"
123
+ }
124
+ },
125
+ "Thread": {
126
+ "backToChat": "Go back to chat",
127
+ "chatCreatedOn": "This chat was created on"
128
+ }
129
+ },
130
+ "header": {
131
+ "chat": "Chat",
132
+ "readme": "Readme"
133
+ }
134
+ }
135
+ },
136
+ "hooks": {
137
+ "useLLMProviders": {
138
+ "failedToFetchProviders": "Failed to fetch providers:"
139
+ }
140
+ },
141
+ "pages": {
142
+ "Design": {},
143
+ "Env": {
144
+ "savedSuccessfully": "Saved successfully",
145
+ "requiredApiKeys": "Required API Keys",
146
+ "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
147
+ },
148
+ "Page": {
149
+ "notPartOfProject": "You are not part of this project."
150
+ },
151
+ "ResumeButton": {
152
+ "resumeChat": "Resume Chat"
153
+ }
154
+ }
155
+ }
.chainlit/translations/pt-BR.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "components": {
3
+ "atoms": {
4
+ "buttons": {
5
+ "userButton": {
6
+ "menu": {
7
+ "settings": "Configura\u00e7\u00f5es",
8
+ "settingsKey": "S",
9
+ "APIKeys": "Chaves de API",
10
+ "logout": "Sair"
11
+ }
12
+ }
13
+ }
14
+ },
15
+ "molecules": {
16
+ "newChatButton": {
17
+ "newChat": "Nova Conversa"
18
+ },
19
+ "tasklist": {
20
+ "TaskList": {
21
+ "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
22
+ "loading": "Carregando...",
23
+ "error": "Ocorreu um erro"
24
+ }
25
+ },
26
+ "attachments": {
27
+ "cancelUpload": "Cancelar envio",
28
+ "removeAttachment": "Remover anexo"
29
+ },
30
+ "newChatDialog": {
31
+ "createNewChat": "Criar novo chat?",
32
+ "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
33
+ "cancel": "Cancelar",
34
+ "confirm": "Confirmar"
35
+ },
36
+ "settingsModal": {
37
+ "expandMessages": "Expandir Mensagens",
38
+ "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
39
+ "darkMode": "Modo Escuro"
40
+ }
41
+ },
42
+ "organisms": {
43
+ "chat": {
44
+ "history": {
45
+ "index": {
46
+ "lastInputs": "\u00daltimas Entradas",
47
+ "noInputs": "Vazio...",
48
+ "loading": "Carregando..."
49
+ }
50
+ },
51
+ "inputBox": {
52
+ "input": {
53
+ "placeholder": "Digite sua mensagem aqui..."
54
+ },
55
+ "speechButton": {
56
+ "start": "Iniciar grava\u00e7\u00e3o",
57
+ "stop": "Parar grava\u00e7\u00e3o"
58
+ },
59
+ "SubmitButton": {
60
+ "sendMessage": "Enviar mensagem",
61
+ "stopTask": "Parar Tarefa"
62
+ },
63
+ "UploadButton": {
64
+ "attachFiles": "Anexar arquivos"
65
+ },
66
+ "waterMark": {
67
+ "text": "Constru\u00eddo com"
68
+ }
69
+ },
70
+ "Messages": {
71
+ "index": {
72
+ "running": "Executando",
73
+ "executedSuccessfully": "executado com sucesso",
74
+ "failed": "falhou",
75
+ "feedbackUpdated": "Feedback atualizado",
76
+ "updating": "Atualizando"
77
+ }
78
+ },
79
+ "dropScreen": {
80
+ "dropYourFilesHere": "Solte seus arquivos aqui"
81
+ },
82
+ "index": {
83
+ "failedToUpload": "Falha ao enviar",
84
+ "cancelledUploadOf": "Envio cancelado de",
85
+ "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
86
+ "continuingChat": "Continuando o chat anterior"
87
+ },
88
+ "settings": {
89
+ "settingsPanel": "Painel de Configura\u00e7\u00f5es",
90
+ "reset": "Redefinir",
91
+ "cancel": "Cancelar",
92
+ "confirm": "Confirmar"
93
+ }
94
+ },
95
+ "threadHistory": {
96
+ "sidebar": {
97
+ "filters": {
98
+ "FeedbackSelect": {
99
+ "feedbackAll": "Feedback: Todos",
100
+ "feedbackPositive": "Feedback: Positivo",
101
+ "feedbackNegative": "Feedback: Negativo"
102
+ },
103
+ "SearchBar": {
104
+ "search": "Buscar"
105
+ }
106
+ },
107
+ "DeleteThreadButton": {
108
+ "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
109
+ "cancel": "Cancelar",
110
+ "confirm": "Confirmar",
111
+ "deletingChat": "Deletando conversa",
112
+ "chatDeleted": "Conversa deletada"
113
+ },
114
+ "index": {
115
+ "pastChats": "Conversas Anteriores"
116
+ },
117
+ "ThreadList": {
118
+ "empty": "Vazio..."
119
+ },
120
+ "TriggerButton": {
121
+ "closeSidebar": "Fechar barra lateral",
122
+ "openSidebar": "Abrir barra lateral"
123
+ }
124
+ },
125
+ "Thread": {
126
+ "backToChat": "Voltar para a conversa",
127
+ "chatCreatedOn": "Esta conversa foi criada em"
128
+ }
129
+ },
130
+ "header": {
131
+ "chat": "Conversa",
132
+ "readme": "Leia-me"
133
+ }
134
+ },
135
+ "hooks": {
136
+ "useLLMProviders": {
137
+ "failedToFetchProviders": "Falha ao buscar provedores:"
138
+ }
139
+ },
140
+ "pages": {
141
+ "Design": {},
142
+ "Env": {
143
+ "savedSuccessfully": "Salvo com sucesso",
144
+ "requiredApiKeys": "Chaves de API necess\u00e1rias",
145
+ "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
146
+ },
147
+ "Page": {
148
+ "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
149
+ },
150
+ "ResumeButton": {
151
+ "resumeChat": "Continuar Conversa"
152
+ }
153
+ }
154
+ }
155
+ }
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+
3
+ load_dotenv()
4
+
5
+ import logging
6
+ import os
7
+ from time import perf_counter
8
+
9
+ import chainlit as cl
10
+ from langchain.cache import InMemoryCache
11
+ from langchain.embeddings import CacheBackedEmbeddings
12
+ from langchain.globals import set_llm_cache
13
+ from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
14
+ from langchain.schema import SystemMessage
15
+ from langchain.schema.output_parser import StrOutputParser
16
+ from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
17
+ from langchain.storage import LocalFileStore
18
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
19
+ from langchain_openai.chat_models import ChatOpenAI
20
+ from langchain_openai.embeddings import OpenAIEmbeddings
21
+ from langchain_pinecone import PineconeVectorStore
22
+ from pinecone import Index
23
+
24
+ from src.loaders import get_docs
25
+
26
+ # Configure logging
27
+ logging.basicConfig(level=logging.INFO)
28
+ logger = logging.getLogger(__name__)
29
+ os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
30
+
31
+ try:
32
+ set_llm_cache(InMemoryCache()) # Use in-memory cache for LLM
33
+
34
+ index = Index(
35
+ api_key=os.environ["PINECONE_API_KEY"],
36
+ index_name=os.environ["PINECONE_INDEX"],
37
+ host=os.environ["PINECONE_HOST"],
38
+ )
39
+
40
+ store = LocalFileStore("./cache/")
41
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
42
+ embeddings = CacheBackedEmbeddings.from_bytes_store(
43
+ underlying_embeddings=embeddings,
44
+ document_embedding_cache=store,
45
+ namespace=embeddings.model,
46
+ )
47
+
48
+ vectorstore = PineconeVectorStore(
49
+ index=index, embedding=embeddings, text_key="text"
50
+ )
51
+
52
+ system_message = SystemMessage(content="You are a helpful assistant.")
53
+ human_template = """Based on the following context generate an answer for the question. If the answer is not available say I dont know.
54
+ Context: {context}
55
+
56
+ Question: {question}
57
+
58
+ Answer:"""
59
+ human_message = HumanMessagePromptTemplate.from_template(template=human_template)
60
+ llm = ChatOpenAI(model="gpt-3.5-turbo-1106", streaming=True)
61
+ prompt = ChatPromptTemplate.from_messages(messages=[system_message, human_message])
62
+ parser = StrOutputParser()
63
+ prompt_chain = prompt | llm | parser
64
+ splitter = RecursiveCharacterTextSplitter(
65
+ chunk_size=500, chunk_overlap=100, length_function=len
66
+ )
67
+ logger.info("Initialization completed successfully.")
68
+ except Exception as e:
69
+ logger.exception("Failed during initialization: %s", str(e))
70
+
71
+
72
+ @cl.on_chat_start
73
+ async def start_chat():
74
+ files = await cl.AskFileMessage(
75
+ content="Please upload a PDF file to answer questions from.",
76
+ accept=["application/pdf"],
77
+ max_size_mb=100,
78
+ max_files=10,
79
+ timeout=60 * 60 * 24 * 7 * 365,
80
+ ).send()
81
+
82
+ out = cl.Message(content="")
83
+ await out.send()
84
+
85
+ paths = [file.path for file in files]
86
+ logger.info(files[0])
87
+ logger.info("Preparing docs: %s", paths)
88
+ start = perf_counter()
89
+ splitted_docs = get_docs(files=files, splitter=splitter)
90
+ end = perf_counter()
91
+ logger.info("Preparing docs took %s seconds.", end - start)
92
+
93
+ retriever = vectorstore.as_retriever(
94
+ search_kwargs={"filter": {"source": {"$in": paths}}}
95
+ )
96
+
97
+ logger.info("Adding documents to vector store retriever.")
98
+ start = perf_counter()
99
+ await retriever.aadd_documents(splitted_docs)
100
+ end = perf_counter()
101
+ logger.info("Adding documents took %s seconds.", end - start)
102
+
103
+ cl.user_session.set("retriever", retriever)
104
+ out.content = f"{len(files)} file(s) loaded! You can now ask questions"
105
+ await out.update()
106
+ logger.info("Files loaded and retriever updated.")
107
+
108
+
109
+ @cl.on_message
110
+ async def main(message: cl.Message):
111
+ retriever = cl.user_session.get("retriever")
112
+ retriever_chain = RunnableParallel(
113
+ {"context": retriever, "question": RunnablePassthrough()}
114
+ )
115
+
116
+ out = cl.Message(content="")
117
+ await out.send()
118
+
119
+ chain = retriever_chain | prompt_chain
120
+ stream = chain.astream(message.content)
121
+
122
+ async for chunk in stream:
123
+ await out.stream_token(chunk)
124
+
125
+ await out.update()
chainlit.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Welcome to DocuQuery 2.0: Your PDF & Text File QA Expert πŸ“„πŸ”
2
+
3
+ Hello, Knowledge Seekers! πŸ‘‹ Excited to introduce you to DocuQuery, your smart assistant for navigating and extracting information from PDFs and text files.
4
+ In this new version Pinecone, LangChain and W&B are used!
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "docuquery2-0-1"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["jjovalle <jj.ovalle@uniandes.edu.co>"]
6
+ readme = "README.md"
7
+ packages = [{include = "src"}]
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "~3.11"
11
+ chainlit = "^1.0.400"
12
+ python-dotenv = "^1.0.1"
13
+ langchain = "^0.1.11"
14
+ langchain-pinecone = "^0.0.3"
15
+ langchain-openai = "^0.0.8"
16
+ pypdf = "^4.1.0"
17
+
18
+
19
+ [tool.poetry.group.dev.dependencies]
20
+ black = "^24.2.0"
21
+ isort = "^5.13.2"
22
+ ipykernel = "^6.29.3"
23
+
24
+ [build-system]
25
+ requires = ["poetry-core"]
26
+ build-backend = "poetry.core.masonry.api"
src/__init__.py ADDED
File without changes
src/loaders.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from chainlit.types import AskFileResponse
4
+ from langchain.docstore.document import Document
5
+ from pypdf import PdfReader
6
+
7
+
8
+ def get_docs(files: List[AskFileResponse], splitter) -> List[str]:
9
+ docs = []
10
+ for file in files:
11
+ reader = PdfReader(file.path)
12
+ doc = [
13
+ Document(
14
+ page_content=page.extract_text(),
15
+ metadata={"source": file.path, "page": page.page_number},
16
+ )
17
+ for page in reader.pages
18
+ ]
19
+ docs.append(doc)
20
+ splitted_docs = [splitter.split_documents(doc) for doc in docs]
21
+ for doc in splitted_docs:
22
+ for i, chunk in enumerate(doc, start=1):
23
+ chunk.metadata["chunk"] = i
24
+ unnested_splitted_docs = [chunk for doc in splitted_docs for chunk in doc]
25
+ return unnested_splitted_docs