Jorge Londono commited on
Commit
83233f5
·
1 Parent(s): 849d2d9

Implemented RAG with memory

Browse files
Files changed (2) hide show
  1. app03-chatRagLcelMem.py +108 -104
  2. test.ipynb +32 -34
app03-chatRagLcelMem.py CHANGED
@@ -10,7 +10,10 @@ from operator import itemgetter
10
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
11
  from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
12
  from langchain_core.output_parsers import StrOutputParser
13
- from langchain_core.messages import AIMessage, HumanMessage
 
 
 
14
 
15
  # HuggingFace
16
  from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -32,6 +35,10 @@ load_dotenv()
32
 
33
  setid = "global"
34
 
 
 
 
 
35
  embeddings = HuggingFaceEmbeddings(model_name=os.getenv("EMBEDDINGS_MODEL"))
36
 
37
  # OpenAI
@@ -50,7 +57,7 @@ model = ChatGroq(model_name='mixtral-8x7b-32768')
50
  pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
51
  index = pc.Index(setid)
52
  vectorstore = PineconeVectorStore(index, embeddings, "text")
53
- retriever = vectorstore.as_retriever(kwargs={"k":5}) # Find 5 documents
54
 
55
 
56
  template_no_history = """Answer the question based only on the following context:
@@ -58,142 +65,139 @@ template_no_history = """Answer the question based only on the following context
58
 
59
  Question: {question}
60
  """
61
- PROMPT_NH = ChatPromptTemplate.from_template(template_no_history)
 
 
62
 
63
- template_with_history = """Given the following conversation history, answer the follow up question:
64
  Chat History:
65
  {chat_history}
 
 
 
66
 
67
- Question: {question}
68
- """
69
- PROMPT_WH = ChatPromptTemplate.from_template(template_with_history)
70
-
71
 
72
- def pipeLog(x):
73
- print("***", x)
74
- return x
75
 
76
 
77
- setup_and_retrieval = RunnableParallel(
78
- {"context": retriever, "question": RunnablePassthrough()}
79
- )
80
 
81
- def format_docs(docs):
82
- return "\n\n".join(doc.page_content for doc in docs)
83
 
84
- rag_chain_from_docs = (
85
- RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
86
- | PROMPT_NH
87
- | model
88
- | StrOutputParser()
89
- )
90
 
91
- rag_chain_with_source = RunnableParallel(
92
- {"context": retriever, "question": RunnablePassthrough()}
93
- ).assign(answer=rag_chain_from_docs)
94
-
95
-
96
- def rag_query(question: str, history: list[list[str]]):
97
- if len(history)==0:
98
- # chain = setup_and_retrieval | PROMPT_NH | model
99
- # response = chain.invoke(question)
100
- response = rag_chain_with_source.invoke(question)
101
- sources = [ doc.metadata['source'] for doc in response['context'] ]
102
- print(response, '\n', sources)
103
- return response['answer'] # FAILS!!!
104
- else:
105
- chat_history = ""
106
- for l in history:
107
- chat_history += " : ".join(l)
108
- chat_history += "\n"
109
- chain = (
110
- { "chat_history": itemgetter('chat_history'), "question": itemgetter('question') }
111
- | PROMPT_WH
112
- | pipeLog
113
- | model
114
- )
115
- response = chain.invoke({ "chat_history": chat_history, "question": question })
116
- return response.content
117
 
118
  # ----------------------------------------
 
119
 
 
 
120
 
121
- def pipeLog(s:str, x):
122
- print(s, x)
123
- return x
124
- pipe_a = RunnableLambda(lambda x: pipeLog("a:",x))
125
- pipe_b = RunnableLambda(lambda x: pipeLog("b:",x))
126
-
127
-
128
-
129
- contextualize_q_system_prompt = """Given a chat history and the latest user question \
130
- which might reference context in the chat history, formulate a standalone question \
131
- which can be understood without the chat history. Do NOT answer the question, \
132
- just reformulate it if needed and otherwise return it as is."""
133
-
134
- contextualize_q_prompt = ChatPromptTemplate.from_messages(
135
- [
136
- ("system", contextualize_q_system_prompt),
137
- MessagesPlaceholder(variable_name="chat_history"),
138
- ("human", "{question}"),
139
- ]
140
  )
141
 
142
- contextualize_q_chain = contextualize_q_prompt | model | StrOutputParser()
143
-
 
 
 
 
 
 
 
 
144
 
 
 
 
 
 
145
 
 
 
 
 
 
146
 
147
- qa_system_prompt = """You are an assistant for question-answering tasks.
148
- Use the following pieces of retrieved context to answer the question.
149
- If you don't know the answer, just say that you don't know.
150
- Use three sentences maximum and keep the answer concise.
 
151
 
152
- {context}"""
153
- qa_prompt = ChatPromptTemplate.from_messages(
154
- [
155
- ("system", qa_system_prompt),
156
- MessagesPlaceholder(variable_name="chat_history"),
157
- ("human", "{question}"),
158
- ]
159
- )
160
 
161
- def contextualized_question(input: dict):
162
- if input.get("chat_history"):
163
- return contextualize_q_chain
164
- else:
165
- return input["question"]
166
 
167
-
168
- rag_chain = (
169
- RunnablePassthrough.assign(
170
- context=pipe_b | contextualized_question | retriever | format_docs
171
- )
172
- | qa_prompt
173
- | model
174
- )
175
-
176
- rag_chain_with_source = RunnableParallel(
177
- {"xx": pipe_a, "context": itemgetter('question')|retriever, "question": itemgetter('question'), "chat_history": itemgetter('chat_history') }
178
- ).assign(answer=rag_chain)
179
 
180
 
181
 
182
- def rag_query_2(question: str, history: list[list[str]]):
183
- response = rag_chain_with_source.invoke({ 'question':question, 'chat_history':history })
184
- print(response)
 
 
 
185
  # sources = [ doc.metadata['source'] for doc in response['context'] ]
186
  # print(response, '\n', sources)
187
  return response['answer'].content
188
 
189
 
 
 
 
 
190
 
191
-
192
-
 
193
 
194
 
195
  gr.ChatInterface(
196
- rag_query_2,
197
  title="RAG Chatbot demo",
198
  description="A chatbot doing Retrieval Augmented Generation, backed by a Pinecone vector database"
199
  ).launch()
 
10
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
11
  from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
12
  from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
14
+ from langchain.prompts.prompt import PromptTemplate
15
+ from langchain.schema import format_document
16
+ from langchain.memory import ConversationBufferMemory
17
 
18
  # HuggingFace
19
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
35
 
36
  setid = "global"
37
 
38
+ def pipeLog(x):
39
+ print("***", x)
40
+ return x
41
+
42
  embeddings = HuggingFaceEmbeddings(model_name=os.getenv("EMBEDDINGS_MODEL"))
43
 
44
  # OpenAI
 
57
  pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
58
  index = pc.Index(setid)
59
  vectorstore = PineconeVectorStore(index, embeddings, "text")
60
+ retriever = vectorstore.as_retriever(kwargs={"k":5}) # Find top-5 documents
61
 
62
 
63
  template_no_history = """Answer the question based only on the following context:
 
65
 
66
  Question: {question}
67
  """
68
+ ANSWER_PROMPT = ChatPromptTemplate.from_template(template_no_history)
69
+
70
+ template_with_history = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
71
 
 
72
  Chat History:
73
  {chat_history}
74
+ Follow Up Input: {question}
75
+ Standalone question:"""
76
+ CONDENSE_QUESTION_PROMPT = ChatPromptTemplate.from_template(template_with_history)
77
 
78
+ DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
 
 
 
79
 
80
+ def _combine_documents(docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"):
81
+ doc_strings = [format_document(doc, document_prompt) for doc in docs]
82
+ return document_separator.join(doc_strings)
83
 
84
 
85
+ # setup_and_retrieval = RunnableParallel(
86
+ # {"context": retriever, "question": RunnablePassthrough()}
87
+ # )
88
 
89
+ # def format_docs(docs):
90
+ # return "\n\n".join(doc.page_content for doc in docs)
91
 
92
+ # rag_chain_from_docs = (
93
+ # RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
94
+ # | PROMPT_NH
95
+ # | model
96
+ # | StrOutputParser()
97
+ # )
98
 
99
+ # rag_chain_with_source = RunnableParallel(
100
+ # {"context": retriever, "question": RunnablePassthrough()}
101
+ # ).assign(answer=rag_chain_from_docs)
102
+
103
+
104
+ # def rag_query(question: str, history: list[list[str]]):
105
+ # if len(history)==0:
106
+ # # chain = setup_and_retrieval | PROMPT_NH | model
107
+ # # response = chain.invoke(question)
108
+ # response = rag_chain_with_source.invoke(question)
109
+ # sources = [ doc.metadata['source'] for doc in response['context'] ]
110
+ # print(response, '\n', sources)
111
+ # return response['answer'] # FAILS!!!
112
+ # else:
113
+ # chat_history = ""
114
+ # for l in history:
115
+ # chat_history += " : ".join(l)
116
+ # chat_history += "\n"
117
+ # chain = (
118
+ # { "chat_history": itemgetter('chat_history'), "question": itemgetter('question') }
119
+ # | PROMPT_WH
120
+ # | pipeLog
121
+ # | model
122
+ # )
123
+ # response = chain.invoke({ "chat_history": chat_history, "question": question })
124
+ # return response.content
125
 
126
  # ----------------------------------------
127
+ # Prepare the chain to run the queries
128
 
129
+ # Store chat history
130
+ memory = ConversationBufferMemory(return_messages=True, output_key="answer", input_key="question")
131
 
132
+ # Load chat history into 'memory' key
133
+ loaded_memory = RunnablePassthrough.assign(
134
+ chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  )
136
 
137
+ # Generate a standalone question
138
+ standalone_question = {
139
+ "standalone_question": {
140
+ "question": lambda x: x["question"],
141
+ "chat_history": lambda x: get_buffer_string(x["chat_history"]),
142
+ }
143
+ | CONDENSE_QUESTION_PROMPT
144
+ | model
145
+ | StrOutputParser(),
146
+ }
147
 
148
+ # Retrieve related documents
149
+ retrieved_documents = {
150
+ "docs": itemgetter("standalone_question") | retriever,
151
+ "question": lambda x: x["standalone_question"],
152
+ }
153
 
154
+ # Construct the inputs for the final prompt
155
+ final_inputs = {
156
+ "context": lambda x: _combine_documents(x["docs"]),
157
+ "question": itemgetter("question"),
158
+ }
159
 
160
+ # And finally, we do the part that returns the answers
161
+ answer = {
162
+ "answer": final_inputs | ANSWER_PROMPT | model,
163
+ "docs": itemgetter("docs"),
164
+ }
165
 
166
+ # The complete chain
167
+ final_chain = loaded_memory | standalone_question | retrieved_documents | answer
 
 
 
 
 
 
168
 
 
 
 
 
 
169
 
170
+ def pipeLog(s:str, x):
171
+ print(s, x)
172
+ return x
173
+ pipe_a = RunnableLambda(lambda x: pipeLog("a:",x))
174
+ pipe_b = RunnableLambda(lambda x: pipeLog("b:",x))
 
 
 
 
 
 
 
175
 
176
 
177
 
178
+ def rag_query(question: str, history: list[list[str]]) -> str:
179
+ """Run a RAG query using own history, not the gradio history"""
180
+ inputs = { 'question':question }
181
+ response = final_chain.invoke(inputs)
182
+ # print(response)
183
+ memory.save_context(inputs, {"answer": response["answer"].content})
184
  # sources = [ doc.metadata['source'] for doc in response['context'] ]
185
  # print(response, '\n', sources)
186
  return response['answer'].content
187
 
188
 
189
+ def test_query(question):
190
+ print('QUESTION:', question)
191
+ answer = rag_query(question, None)
192
+ print('ANSWER: ', answer, '\n')
193
 
194
+ # test_query("What is the capital of France?")
195
+ # test_query("What is a Blockchain?")
196
+ # test_query("What is it useful for?")
197
 
198
 
199
  gr.ChatInterface(
200
+ rag_query,
201
  title="RAG Chatbot demo",
202
  description="A chatbot doing Retrieval Augmented Generation, backed by a Pinecone vector database"
203
  ).launch()
test.ipynb CHANGED
@@ -20,7 +20,7 @@
20
  },
21
  {
22
  "cell_type": "code",
23
- "execution_count": 27,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
@@ -91,7 +91,7 @@
91
  },
92
  {
93
  "cell_type": "code",
94
- "execution_count": 4,
95
  "metadata": {},
96
  "outputs": [],
97
  "source": [
@@ -100,9 +100,7 @@
100
  "\n",
101
  "embeddings = HuggingFaceEmbeddings(model_name=os.getenv(\"EMBEDDINGS_MODEL\"))\n",
102
  " \n",
103
- "pc = Pinecone(\n",
104
- " api_key=os.getenv(\"PINECONE_API_KEY\")\n",
105
- " )\n",
106
  "index = pc.Index(setid)\n",
107
  "vectorstore = PineconeVectorStore(index, embeddings, \"text\")\n",
108
  "retriever = vectorstore.as_retriever(kwargs={\"k\":5}) # Find 5 documents\n"
@@ -117,7 +115,7 @@
117
  },
118
  {
119
  "cell_type": "code",
120
- "execution_count": 5,
121
  "metadata": {},
122
  "outputs": [],
123
  "source": [
@@ -139,14 +137,14 @@
139
  },
140
  {
141
  "cell_type": "code",
142
- "execution_count": 6,
143
  "metadata": {},
144
  "outputs": [
145
  {
146
  "name": "stdout",
147
  "output_type": "stream",
148
  "text": [
149
- "content='Based on the provided document, a blockchain is a distributed ledger technology that enables secure and immutable record-keeping of digital transactions. It consists of a chain of blocks, each containing a list of validated and time-stamped transactions. The key features of blockchain include transparency, immutability, security, and decentralization of recorded data in the ledger. Blockchain technology implements a decentralized fully replicated append-only ledger in a peer-to-peer network, where multiple participants, or nodes, maintain copies of the ledger. This distributed consensus mechanism ensures that no single entity has control over the entire network, making it resistant to tampering and censorship. In public blockchains, transparency is achieved by its public nature, allowing members and non-members to view and verify the transactions.'\n"
150
  ]
151
  }
152
  ],
@@ -175,7 +173,7 @@
175
  },
176
  {
177
  "cell_type": "code",
178
- "execution_count": 7,
179
  "metadata": {},
180
  "outputs": [
181
  {
@@ -184,7 +182,7 @@
184
  "'Que es blockchain? : Blockchain es una cadena de bloques\\nPara que se usa : Para registrar transacciones\\n'"
185
  ]
186
  },
187
- "execution_count": 7,
188
  "metadata": {},
189
  "output_type": "execute_result"
190
  }
@@ -205,16 +203,16 @@
205
  },
206
  {
207
  "cell_type": "code",
208
- "execution_count": 8,
209
  "metadata": {},
210
  "outputs": [
211
  {
212
  "data": {
213
  "text/plain": [
214
- "AIMessage(content='En la temática de blockchain, el consenso se refiere al método por el cual todos los nodos de una red descentralizada de blockchain agreement on the current state of the blockchain. In other words, consenso is the way that the nodes reach a consensus on the validity of transactions and the order in which they were received, ensuring that all nodes have the same copy of the blockchain. Examples of consensus algorithms include Proof of Work (PoW) and Proof of Stake (PoS).')"
215
  ]
216
  },
217
- "execution_count": 8,
218
  "metadata": {},
219
  "output_type": "execute_result"
220
  }
@@ -238,7 +236,7 @@
238
  },
239
  {
240
  "cell_type": "code",
241
- "execution_count": 9,
242
  "metadata": {},
243
  "outputs": [],
244
  "source": [
@@ -259,21 +257,21 @@
259
  },
260
  {
261
  "cell_type": "code",
262
- "execution_count": 10,
263
  "metadata": {},
264
  "outputs": [
265
  {
266
  "data": {
267
  "text/plain": [
268
- "{'context': [Document(page_content='the nodes present on the chain maintain a complete local copy of the blockchain. The\\nblockchain is an indigenous technology that has emerged for decentralized applications\\nas the outcome of complication, privacy, and security issues present in the applications\\nover half a century [3,4]. It is a peer-to-peer system that authorizes the users to maintain a\\nledger for various transactions that are reproduced, and remains identical in more than\\none location over multiple user servers [5].\\nA blockchain is essentially a block of chains, with the growing list of records referred\\nto as blocks that are joined with cryptography [4]. Each blockchain contains a hash of a\\nprevious block, and a timestamp that keeps track of the creation and modification time of', metadata={'chunk': 3.0, 'source': 'CustodyBlock-2021.txt'}),\n",
269
- " Document(page_content='customer information [14]. Blockchain is the core strength of IoT so\\xad\\nlutions to build a system with cryptographically protected records that\\nare reluctant to change and inaccuracy. Additionally, Blockchain faces\\nseveral crucial issues intrinsic to the Internet of Things, such as a large\\nnumber of IoT devices, a non-homogeneous network topology, limited\\ncomputational capacity, poor communication bandwidth, etc.', metadata={'chunk': 24.0, 'source': 'BlockchainBased-2023.txt'}),\n",
270
- " Document(page_content='as a distributed ledger technology that enables secure and immutable record-keeping of\\ndigital transactions. It comprises a chain of blocks, each containing a list of validated and\\ntime-stamped transactions. An interesting feature of blockchain is its decentralized nature,\\nwhere multiple participants, or nodes, maintain copies of the ledger. This distributed\\nconsensus mechanism ensures that no single entity has control over the entire network,\\nmaking it resistant to tampering and censorship. Thus, blockchain is ripe for contexts\\ninvolving multiple parties with a need for a reliable and trustworthy ambiance in the\\nregistering of sensitive information, since it can “allow for an audit trail of all operations\\ncarried out between peers without the need for a centralized authority” (Grima et al. 2021).\\nBlockchains can be classified as public, private/permissioned, or hybrid. Public\\nblockchain allows any interested party to be a node in the network and to participate in\\nthe consensus. Registered data can be viewed by members or non-members. In its turn,\\nprivate or permissioned blockchains only allow the participation of authorized members,\\nlimiting data access to such participants. Lastly, hybrid blockchains embed characteristics\\nof both public and private blockchains.\\nThe key features of blockchain include transparency, immutability, security, and decentralization of recorded data in the ledger data. In public blockchains, transparency is\\nachieved by its public nature, allowing members and non-members to view and verify', metadata={'chunk': 6.0, 'source': 'ExploringBC-2023.txt'}),\n",
271
- " Document(page_content='2\\n\\nBackground\\n\\n2.1\\n\\nBlockchain technology\\n\\nThe blockchain technology implements a decentralized fully replicated append-only ledger in a\\npeer-to-peer network, originally employed for the Bitcoin cryptocurrency [7]. All participating\\nnodes maintain a full local copy of the blockchain. The blockchain consists of a sequence\\nof blocks containing the transactions of the ledger. Transactions inside blocks are sorted\\nchronologically and each block contains a cryptographic hash of the previous block in the\\nchain. Nodes create new blocks as they receives transactions, which are broadcast in the\\nnetwork. Once a block is complete, they start the consensus process to convince other nodes\\nto include it in the blockchain. In the original blockchain technology employed in Bitcoin\\nthe consensus process is based on Proof-of-Work (PoW) [7]. With PoW nodes compete with\\neach other in confirming transactions and creating new blocks by solving a mathematical\\npuzzle. While solving a block is a computational intensive task, verifying its validity is easy.\\nTo incentivize such mechanism, solving a block also results in mining a certain amount of\\n\\n\\x0cS. Bonomi, M. Casini, and C. Ciccotelli\\n\\n12:3', metadata={'chunk': 5.0, 'source': 'B-CoC-2020.txt'})],\n",
272
  " 'question': 'What is a blockchain?',\n",
273
- " 'answer': 'A blockchain is a distributed ledger technology that comprises a chain of blocks, each containing a list of validated and time-stamped transactions. It is a decentralized system where multiple participants, or nodes, maintain copies of the ledger, ensuring no single entity has control over the entire network. This mechanism makes it resistant to tampering and censorship. Blockchains can be classified as public, private/permissioned, or hybrid, and feature transparency, immutability, security, and decentralization of recorded data in the ledger data. In public blockchains, transparency is achieved by its public nature, allowing members and non-members to view and verify the transactions.'}"
274
  ]
275
  },
276
- "execution_count": 10,
277
  "metadata": {},
278
  "output_type": "execute_result"
279
  }
@@ -285,17 +283,17 @@
285
  },
286
  {
287
  "cell_type": "code",
288
- "execution_count": 11,
289
  "metadata": {},
290
  "outputs": [
291
  {
292
  "data": {
293
  "text/plain": [
294
  "('What is a blockchain?',\n",
295
- " 'A blockchain is a distributed ledger technology that comprises a chain of blocks, each containing a list of validated and time-stamped transactions. It is a decentralized system where multiple participants, or nodes, maintain copies of the ledger, ensuring no single entity has control over the entire network. This mechanism makes it resistant to tampering and censorship. Blockchains can be classified as public, private/permissioned, or hybrid, and feature transparency, immutability, security, and decentralization of recorded data in the ledger data. In public blockchains, transparency is achieved by its public nature, allowing members and non-members to view and verify the transactions.')"
296
  ]
297
  },
298
- "execution_count": 11,
299
  "metadata": {},
300
  "output_type": "execute_result"
301
  }
@@ -306,17 +304,17 @@
306
  },
307
  {
308
  "cell_type": "code",
309
- "execution_count": 12,
310
  "metadata": {},
311
  "outputs": [
312
  {
313
  "name": "stdout",
314
  "output_type": "stream",
315
  "text": [
316
- "CustodyBlock-2021.txt\n",
 
317
  "BlockchainBased-2023.txt\n",
318
- "ExploringBC-2023.txt\n",
319
- "B-CoC-2020.txt\n"
320
  ]
321
  }
322
  ],
@@ -327,19 +325,19 @@
327
  },
328
  {
329
  "cell_type": "code",
330
- "execution_count": 13,
331
  "metadata": {},
332
  "outputs": [
333
  {
334
  "data": {
335
  "text/plain": [
336
- "['CustodyBlock-2021.txt',\n",
 
337
  " 'BlockchainBased-2023.txt',\n",
338
- " 'ExploringBC-2023.txt',\n",
339
- " 'B-CoC-2020.txt']"
340
  ]
341
  },
342
- "execution_count": 13,
343
  "metadata": {},
344
  "output_type": "execute_result"
345
  }
@@ -1212,7 +1210,7 @@
1212
  "name": "python",
1213
  "nbconvert_exporter": "python",
1214
  "pygments_lexer": "ipython3",
1215
- "version": "3.10.12"
1216
  }
1217
  },
1218
  "nbformat": 4,
 
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 2,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
 
91
  },
92
  {
93
  "cell_type": "code",
94
+ "execution_count": 5,
95
  "metadata": {},
96
  "outputs": [],
97
  "source": [
 
100
  "\n",
101
  "embeddings = HuggingFaceEmbeddings(model_name=os.getenv(\"EMBEDDINGS_MODEL\"))\n",
102
  " \n",
103
+ "pc = Pinecone( api_key=os.getenv(\"PINECONE_API_KEY\") )\n",
 
 
104
  "index = pc.Index(setid)\n",
105
  "vectorstore = PineconeVectorStore(index, embeddings, \"text\")\n",
106
  "retriever = vectorstore.as_retriever(kwargs={\"k\":5}) # Find 5 documents\n"
 
115
  },
116
  {
117
  "cell_type": "code",
118
+ "execution_count": 6,
119
  "metadata": {},
120
  "outputs": [],
121
  "source": [
 
137
  },
138
  {
139
  "cell_type": "code",
140
+ "execution_count": 7,
141
  "metadata": {},
142
  "outputs": [
143
  {
144
  "name": "stdout",
145
  "output_type": "stream",
146
  "text": [
147
+ "content='Based on the provided document, a blockchain is a type of distributed ledger technology that implements a decentralized, fully replicated append-only ledger in a peer-to-peer network. It consists of a chain of blocks, where each block contains a list of validated and timestamped transactions. Blockchain technology is known for its secure and immutable record-keeping of digital transactions, as well as its resistance to tampering and censorship due to its decentralized nature. In a blockchain network, multiple participants, or nodes, maintain copies of the ledger, and processing and verifying transactions are the responsibility of every node. Blockchain technology can be classified as public, private/permissioned, or hybrid.' response_metadata={'token_usage': {'completion_time': 0.269, 'completion_tokens': 151, 'prompt_time': 1.5510000000000002, 'prompt_tokens': 1712, 'queue_time': None, 'total_time': 1.8200000000000003, 'total_tokens': 1863}, 'model_name': 'mixtral-8x7b-32768', 'system_fingerprint': 'fp_13a4b82d64', 'finish_reason': 'stop', 'logprobs': None}\n"
148
  ]
149
  }
150
  ],
 
173
  },
174
  {
175
  "cell_type": "code",
176
+ "execution_count": 8,
177
  "metadata": {},
178
  "outputs": [
179
  {
 
182
  "'Que es blockchain? : Blockchain es una cadena de bloques\\nPara que se usa : Para registrar transacciones\\n'"
183
  ]
184
  },
185
+ "execution_count": 8,
186
  "metadata": {},
187
  "output_type": "execute_result"
188
  }
 
203
  },
204
  {
205
  "cell_type": "code",
206
+ "execution_count": 9,
207
  "metadata": {},
208
  "outputs": [
209
  {
210
  "data": {
211
  "text/plain": [
212
+ "AIMessage(content='En la tecnología de blockchain, \"consenso\" se refiere al mecanismo por el cual se llega a un acuerdo sobre el estado del registro distribuido. Hay varios algoritmos de consenso, como Proof of Work (PoW) y Proof of Stake (PoS), que se utilizan para asegurar la exactitud y la validez de las transacciones en la red blockchain. El algoritmo de consenso ayuda a evitar la duplicación de entradas y garantiza que las transacciones sean seguras y verificables.\\n\\nEn resumen, consenso en blockchain es el proceso de llegar a un acuerdo sobre el estado del registro distribuido, usando algoritmos para asegurar la exactitud y validez de las transacciones.', response_metadata={'token_usage': {'completion_time': 0.355, 'completion_tokens': 195, 'prompt_time': 0.063, 'prompt_tokens': 68, 'queue_time': None, 'total_time': 0.418, 'total_tokens': 263}, 'model_name': 'mixtral-8x7b-32768', 'system_fingerprint': 'fp_1cc6d039b0', 'finish_reason': 'stop', 'logprobs': None})"
213
  ]
214
  },
215
+ "execution_count": 9,
216
  "metadata": {},
217
  "output_type": "execute_result"
218
  }
 
236
  },
237
  {
238
  "cell_type": "code",
239
+ "execution_count": 10,
240
  "metadata": {},
241
  "outputs": [],
242
  "source": [
 
257
  },
258
  {
259
  "cell_type": "code",
260
+ "execution_count": 11,
261
  "metadata": {},
262
  "outputs": [
263
  {
264
  "data": {
265
  "text/plain": [
266
+ "{'context': [Document(page_content='2\\n\\nBackground\\n\\n2.1\\n\\nBlockchain technology\\n\\nThe blockchain technology implements a decentralized fully replicated append-only ledger in a\\npeer-to-peer network, originally employed for the Bitcoin cryptocurrency [7]. All participating\\nnodes maintain a full local copy of the blockchain. The blockchain consists of a sequence\\nof blocks containing the transactions of the ledger. Transactions inside blocks are sorted\\nchronologically and each block contains a cryptographic hash of the previous block in the\\nchain. Nodes create new blocks as they receives transactions, which are broadcast in the\\nnetwork. Once a block is complete, they start the consensus process to convince other nodes\\nto include it in the blockchain. In the original blockchain technology employed in Bitcoin\\nthe consensus process is based on Proof-of-Work (PoW) [7]. With PoW nodes compete with\\neach other in confirming transactions and creating new blocks by solving a mathematical\\npuzzle. While solving a block is a computational intensive task, verifying its validity is easy.\\nTo incentivize such mechanism, solving a block also results in mining a certain amount of\\n\\n\\x0cS. Bonomi, M. Casini, and C. Ciccotelli\\n\\n12:3', metadata={'chunk': 4.0, 'source': 'B-CoC-2020.txt'}),\n",
267
+ " Document(page_content='2\\n\\nBackground\\n\\n2.1\\n\\nBlockchain technology\\n\\nThe blockchain technology implements a decentralized fully replicated append-only ledger in a\\npeer-to-peer network, originally employed for the Bitcoin cryptocurrency [7]. All participating\\nnodes maintain a full local copy of the blockchain. The blockchain consists of a sequence\\nof blocks containing the transactions of the ledger. Transactions inside blocks are sorted\\nchronologically and each block contains a cryptographic hash of the previous block in the\\nchain. Nodes create new blocks as they receives transactions, which are broadcast in the\\nnetwork. Once a block is complete, they start the consensus process to convince other nodes\\nto include it in the blockchain. In the original blockchain technology employed in Bitcoin\\nthe consensus process is based on Proof-of-Work (PoW) [7]. With PoW nodes compete with\\neach other in confirming transactions and creating new blocks by solving a mathematical\\npuzzle. While solving a block is a computational intensive task, verifying its validity is easy.\\nTo incentivize such mechanism, solving a block also results in mining a certain amount of\\n\\n\\x0cS. Bonomi, M. Casini, and C. Ciccotelli\\n\\n12:3', metadata={'chunk': 4.0, 'source': 'OASIcs-Tokenomics-2019-12.txt'}),\n",
268
+ " Document(page_content='2.5. Components in blockchain technology\\nThe structure of a blockchain is a decentralized database consisting\\nof a chain of blocks that contain transactions, with each block linked to\\nthe previous one through cryptographic hashes, creating an immutable\\nand secure ledger of transactions as shown in Fig. 1. This structure en\\xad\\nables trust and transparency in the network by allowing participants to\\nverify and validate transactions without the need for intermediaries. The\\ncomponent used for blockchain technology are as follows:\\n(a) Node: A node in a blockchain network is a system, or it can be a\\nrouter or switch. It’s possible to create a dispersed network of\\nnodes with equal rights by using a P2P network. Processing and\\nverifying transactions are the exclusive responsibility of every\\nnode in the network [33].\\n(b) Transactions: Transactions are the smallest and most funda\\xad\\nmental part of the Blockchain. In blockchain technology, a record\\nacts as a transaction for payment history that includes the sender\\nand recipient address and a timestamp of the occurrence of a\\ntransaction. In a blockchain network, the storage, analysis, and\\nretrieval of completed transactions are important aspects of\\nmaintaining the integrity and transparency of the network [34].\\n(c) Block: The procedures for block validation are depicted by the\\nblock version number given to each block in the Blockchain. A\\ntimestamp value indicates when the particular block was\\n\\n2.4. Blockchain technology\\nBlockchain technology is a distributed ledger that is immutable,\\n4\\n\\n\\x0cSakshi et al.\\n\\nJournal of Information Security and Applications 77 (2023) 103579\\n\\nFig. 1. Blockchain Structure.', metadata={'chunk': 19.0, 'source': 'BlockchainBased-2023.txt'}),\n",
269
+ " Document(page_content='The review was based on resources from four established scientific databases. A total\\nof 72 resources were found in these databases, of which 26 resources were fully analyzed\\nand provided evidence of the status of the research of blockchain-based solutions to solve\\nproblems related to the chain of custody of physical evidence and of how the current\\nliterature relates to the concept of physical evidence. The final selected resources (37%)\\nsufficiently represented a diverse range of perspectives and findings, enabling this article\\nto draw relevant conclusions and to contribute to the existing knowledge on the topic.\\nThe other sections of this paper are organized as follows. Section 2 provides the main\\nconcepts discussed in this paper, and Section 3 highlights current literature reviews focusing\\non the use of blockchain in the forensic field. Section 4 explains the research methodology.\\nSection 5 provides the results, and Section 6 the discussion. Finally, Section 7 presents the\\nlimitations and proposed future research and Section 8 concludes the paper.\\n2. Background\\nBlockchain technology has emerged as a disruptive innovation, providing a decentralized and transparent environment across various domains. Blockchain can be understood\\nas a distributed ledger technology that enables secure and immutable record-keeping of\\ndigital transactions. It comprises a chain of blocks, each containing a list of validated and\\ntime-stamped transactions. An interesting feature of blockchain is its decentralized nature,\\nwhere multiple participants, or nodes, maintain copies of the ledger. This distributed\\nconsensus mechanism ensures that no single entity has control over the entire network,\\nmaking it resistant to tampering and censorship. Thus, blockchain is ripe for contexts\\ninvolving multiple parties with a need for a reliable and trustworthy ambiance in the\\nregistering of sensitive information, since it can “allow for an audit trail of all operations\\ncarried out between peers without the need for a centralized authority” (Grima et al. 2021).\\nBlockchains can be classified as public, private/permissioned, or hybrid. Public\\nblockchain allows any interested party to be a node in the network and to participate in\\nthe consensus. Registered data can be viewed by members or non-members. In its turn,', metadata={'chunk': 3.0, 'source': 'ExploringBC-2023.txt'})],\n",
270
  " 'question': 'What is a blockchain?',\n",
271
+ " 'answer': 'A blockchain is a decentralized fully replicated append-only ledger in a peer-to-peer network, consisting of a chain of blocks containing transactions of the ledger. Each block contains a cryptographic hash of the previous block in the chain, creating an immutable and secure ledger of transactions. The structure enables trust and transparency in the network by allowing participants to verify and validate transactions without the need for intermediaries. It comprises components such as nodes, transactions, and blocks. Nodes maintain a full local copy of the blockchain and are responsible for processing and verifying transactions. Transactions are the smallest and most fundamental part of the blockchain, while blocks are linked to the previous one through cryptographic hashes. The procedures for block validation are depicted by the block version number given to each block in the blockchain. A timestamp value indicates when the particular block was created.'}"
272
  ]
273
  },
274
+ "execution_count": 11,
275
  "metadata": {},
276
  "output_type": "execute_result"
277
  }
 
283
  },
284
  {
285
  "cell_type": "code",
286
+ "execution_count": 12,
287
  "metadata": {},
288
  "outputs": [
289
  {
290
  "data": {
291
  "text/plain": [
292
  "('What is a blockchain?',\n",
293
+ " 'A blockchain is a decentralized fully replicated append-only ledger in a peer-to-peer network, consisting of a chain of blocks containing transactions of the ledger. Each block contains a cryptographic hash of the previous block in the chain, creating an immutable and secure ledger of transactions. The structure enables trust and transparency in the network by allowing participants to verify and validate transactions without the need for intermediaries. It comprises components such as nodes, transactions, and blocks. Nodes maintain a full local copy of the blockchain and are responsible for processing and verifying transactions. Transactions are the smallest and most fundamental part of the blockchain, while blocks are linked to the previous one through cryptographic hashes. The procedures for block validation are depicted by the block version number given to each block in the blockchain. A timestamp value indicates when the particular block was created.')"
294
  ]
295
  },
296
+ "execution_count": 12,
297
  "metadata": {},
298
  "output_type": "execute_result"
299
  }
 
304
  },
305
  {
306
  "cell_type": "code",
307
+ "execution_count": 13,
308
  "metadata": {},
309
  "outputs": [
310
  {
311
  "name": "stdout",
312
  "output_type": "stream",
313
  "text": [
314
+ "B-CoC-2020.txt\n",
315
+ "OASIcs-Tokenomics-2019-12.txt\n",
316
  "BlockchainBased-2023.txt\n",
317
+ "ExploringBC-2023.txt\n"
 
318
  ]
319
  }
320
  ],
 
325
  },
326
  {
327
  "cell_type": "code",
328
+ "execution_count": 14,
329
  "metadata": {},
330
  "outputs": [
331
  {
332
  "data": {
333
  "text/plain": [
334
+ "['B-CoC-2020.txt',\n",
335
+ " 'OASIcs-Tokenomics-2019-12.txt',\n",
336
  " 'BlockchainBased-2023.txt',\n",
337
+ " 'ExploringBC-2023.txt']"
 
338
  ]
339
  },
340
+ "execution_count": 14,
341
  "metadata": {},
342
  "output_type": "execute_result"
343
  }
 
1210
  "name": "python",
1211
  "nbconvert_exporter": "python",
1212
  "pygments_lexer": "ipython3",
1213
+ "version": "3.11.8"
1214
  }
1215
  },
1216
  "nbformat": 4,