Jorge Londono
commited on
Commit
·
83233f5
1
Parent(s):
849d2d9
Implemented RAG with memory
Browse files- app03-chatRagLcelMem.py +108 -104
- test.ipynb +32 -34
app03-chatRagLcelMem.py
CHANGED
@@ -10,7 +10,10 @@ from operator import itemgetter
|
|
10 |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
11 |
from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
|
12 |
from langchain_core.output_parsers import StrOutputParser
|
13 |
-
from langchain_core.messages import AIMessage, HumanMessage
|
|
|
|
|
|
|
14 |
|
15 |
# HuggingFace
|
16 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
@@ -32,6 +35,10 @@ load_dotenv()
|
|
32 |
|
33 |
setid = "global"
|
34 |
|
|
|
|
|
|
|
|
|
35 |
embeddings = HuggingFaceEmbeddings(model_name=os.getenv("EMBEDDINGS_MODEL"))
|
36 |
|
37 |
# OpenAI
|
@@ -50,7 +57,7 @@ model = ChatGroq(model_name='mixtral-8x7b-32768')
|
|
50 |
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
|
51 |
index = pc.Index(setid)
|
52 |
vectorstore = PineconeVectorStore(index, embeddings, "text")
|
53 |
-
retriever = vectorstore.as_retriever(kwargs={"k":5}) # Find 5 documents
|
54 |
|
55 |
|
56 |
template_no_history = """Answer the question based only on the following context:
|
@@ -58,142 +65,139 @@ template_no_history = """Answer the question based only on the following context
|
|
58 |
|
59 |
Question: {question}
|
60 |
"""
|
61 |
-
|
|
|
|
|
62 |
|
63 |
-
template_with_history = """Given the following conversation history, answer the follow up question:
|
64 |
Chat History:
|
65 |
{chat_history}
|
|
|
|
|
|
|
66 |
|
67 |
-
|
68 |
-
"""
|
69 |
-
PROMPT_WH = ChatPromptTemplate.from_template(template_with_history)
|
70 |
-
|
71 |
|
72 |
-
def
|
73 |
-
|
74 |
-
return
|
75 |
|
76 |
|
77 |
-
setup_and_retrieval = RunnableParallel(
|
78 |
-
|
79 |
-
)
|
80 |
|
81 |
-
def format_docs(docs):
|
82 |
-
|
83 |
|
84 |
-
rag_chain_from_docs = (
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
)
|
90 |
|
91 |
-
rag_chain_with_source = RunnableParallel(
|
92 |
-
|
93 |
-
).assign(answer=rag_chain_from_docs)
|
94 |
-
|
95 |
-
|
96 |
-
def rag_query(question: str, history: list[list[str]]):
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
|
118 |
# ----------------------------------------
|
|
|
119 |
|
|
|
|
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
pipe_a = RunnableLambda(lambda x: pipeLog("a:",x))
|
125 |
-
pipe_b = RunnableLambda(lambda x: pipeLog("b:",x))
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
130 |
-
which might reference context in the chat history, formulate a standalone question \
|
131 |
-
which can be understood without the chat history. Do NOT answer the question, \
|
132 |
-
just reformulate it if needed and otherwise return it as is."""
|
133 |
-
|
134 |
-
contextualize_q_prompt = ChatPromptTemplate.from_messages(
|
135 |
-
[
|
136 |
-
("system", contextualize_q_system_prompt),
|
137 |
-
MessagesPlaceholder(variable_name="chat_history"),
|
138 |
-
("human", "{question}"),
|
139 |
-
]
|
140 |
)
|
141 |
|
142 |
-
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
|
|
|
|
|
|
|
|
|
|
|
145 |
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
|
|
151 |
|
152 |
-
|
153 |
-
|
154 |
-
[
|
155 |
-
("system", qa_system_prompt),
|
156 |
-
MessagesPlaceholder(variable_name="chat_history"),
|
157 |
-
("human", "{question}"),
|
158 |
-
]
|
159 |
-
)
|
160 |
|
161 |
-
def contextualized_question(input: dict):
|
162 |
-
if input.get("chat_history"):
|
163 |
-
return contextualize_q_chain
|
164 |
-
else:
|
165 |
-
return input["question"]
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
| qa_prompt
|
173 |
-
| model
|
174 |
-
)
|
175 |
-
|
176 |
-
rag_chain_with_source = RunnableParallel(
|
177 |
-
{"xx": pipe_a, "context": itemgetter('question')|retriever, "question": itemgetter('question'), "chat_history": itemgetter('chat_history') }
|
178 |
-
).assign(answer=rag_chain)
|
179 |
|
180 |
|
181 |
|
182 |
-
def
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
185 |
# sources = [ doc.metadata['source'] for doc in response['context'] ]
|
186 |
# print(response, '\n', sources)
|
187 |
return response['answer'].content
|
188 |
|
189 |
|
|
|
|
|
|
|
|
|
190 |
|
191 |
-
|
192 |
-
|
|
|
193 |
|
194 |
|
195 |
gr.ChatInterface(
|
196 |
-
|
197 |
title="RAG Chatbot demo",
|
198 |
description="A chatbot doing Retrieval Augmented Generation, backed by a Pinecone vector database"
|
199 |
).launch()
|
|
|
10 |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
11 |
from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
|
12 |
from langchain_core.output_parsers import StrOutputParser
|
13 |
+
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
|
14 |
+
from langchain.prompts.prompt import PromptTemplate
|
15 |
+
from langchain.schema import format_document
|
16 |
+
from langchain.memory import ConversationBufferMemory
|
17 |
|
18 |
# HuggingFace
|
19 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
35 |
|
36 |
setid = "global"
|
37 |
|
38 |
+
def pipeLog(x):
|
39 |
+
print("***", x)
|
40 |
+
return x
|
41 |
+
|
42 |
embeddings = HuggingFaceEmbeddings(model_name=os.getenv("EMBEDDINGS_MODEL"))
|
43 |
|
44 |
# OpenAI
|
|
|
57 |
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
|
58 |
index = pc.Index(setid)
|
59 |
vectorstore = PineconeVectorStore(index, embeddings, "text")
|
60 |
+
retriever = vectorstore.as_retriever(kwargs={"k":5}) # Find top-5 documents
|
61 |
|
62 |
|
63 |
template_no_history = """Answer the question based only on the following context:
|
|
|
65 |
|
66 |
Question: {question}
|
67 |
"""
|
68 |
+
ANSWER_PROMPT = ChatPromptTemplate.from_template(template_no_history)
|
69 |
+
|
70 |
+
template_with_history = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
|
71 |
|
|
|
72 |
Chat History:
|
73 |
{chat_history}
|
74 |
+
Follow Up Input: {question}
|
75 |
+
Standalone question:"""
|
76 |
+
CONDENSE_QUESTION_PROMPT = ChatPromptTemplate.from_template(template_with_history)
|
77 |
|
78 |
+
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
|
|
|
|
|
|
|
79 |
|
80 |
+
def _combine_documents(docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"):
|
81 |
+
doc_strings = [format_document(doc, document_prompt) for doc in docs]
|
82 |
+
return document_separator.join(doc_strings)
|
83 |
|
84 |
|
85 |
+
# setup_and_retrieval = RunnableParallel(
|
86 |
+
# {"context": retriever, "question": RunnablePassthrough()}
|
87 |
+
# )
|
88 |
|
89 |
+
# def format_docs(docs):
|
90 |
+
# return "\n\n".join(doc.page_content for doc in docs)
|
91 |
|
92 |
+
# rag_chain_from_docs = (
|
93 |
+
# RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
|
94 |
+
# | PROMPT_NH
|
95 |
+
# | model
|
96 |
+
# | StrOutputParser()
|
97 |
+
# )
|
98 |
|
99 |
+
# rag_chain_with_source = RunnableParallel(
|
100 |
+
# {"context": retriever, "question": RunnablePassthrough()}
|
101 |
+
# ).assign(answer=rag_chain_from_docs)
|
102 |
+
|
103 |
+
|
104 |
+
# def rag_query(question: str, history: list[list[str]]):
|
105 |
+
# if len(history)==0:
|
106 |
+
# # chain = setup_and_retrieval | PROMPT_NH | model
|
107 |
+
# # response = chain.invoke(question)
|
108 |
+
# response = rag_chain_with_source.invoke(question)
|
109 |
+
# sources = [ doc.metadata['source'] for doc in response['context'] ]
|
110 |
+
# print(response, '\n', sources)
|
111 |
+
# return response['answer'] # FAILS!!!
|
112 |
+
# else:
|
113 |
+
# chat_history = ""
|
114 |
+
# for l in history:
|
115 |
+
# chat_history += " : ".join(l)
|
116 |
+
# chat_history += "\n"
|
117 |
+
# chain = (
|
118 |
+
# { "chat_history": itemgetter('chat_history'), "question": itemgetter('question') }
|
119 |
+
# | PROMPT_WH
|
120 |
+
# | pipeLog
|
121 |
+
# | model
|
122 |
+
# )
|
123 |
+
# response = chain.invoke({ "chat_history": chat_history, "question": question })
|
124 |
+
# return response.content
|
125 |
|
126 |
# ----------------------------------------
|
127 |
+
# Prepare the chain to run the queries
|
128 |
|
129 |
+
# Store chat history
|
130 |
+
memory = ConversationBufferMemory(return_messages=True, output_key="answer", input_key="question")
|
131 |
|
132 |
+
# Load chat history into 'memory' key
|
133 |
+
loaded_memory = RunnablePassthrough.assign(
|
134 |
+
chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
)
|
136 |
|
137 |
+
# Generate a standalone question
|
138 |
+
standalone_question = {
|
139 |
+
"standalone_question": {
|
140 |
+
"question": lambda x: x["question"],
|
141 |
+
"chat_history": lambda x: get_buffer_string(x["chat_history"]),
|
142 |
+
}
|
143 |
+
| CONDENSE_QUESTION_PROMPT
|
144 |
+
| model
|
145 |
+
| StrOutputParser(),
|
146 |
+
}
|
147 |
|
148 |
+
# Retrieve related documents
|
149 |
+
retrieved_documents = {
|
150 |
+
"docs": itemgetter("standalone_question") | retriever,
|
151 |
+
"question": lambda x: x["standalone_question"],
|
152 |
+
}
|
153 |
|
154 |
+
# Construct the inputs for the final prompt
|
155 |
+
final_inputs = {
|
156 |
+
"context": lambda x: _combine_documents(x["docs"]),
|
157 |
+
"question": itemgetter("question"),
|
158 |
+
}
|
159 |
|
160 |
+
# And finally, we do the part that returns the answers
|
161 |
+
answer = {
|
162 |
+
"answer": final_inputs | ANSWER_PROMPT | model,
|
163 |
+
"docs": itemgetter("docs"),
|
164 |
+
}
|
165 |
|
166 |
+
# The complete chain
|
167 |
+
final_chain = loaded_memory | standalone_question | retrieved_documents | answer
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
+
def pipeLog(s:str, x):
|
171 |
+
print(s, x)
|
172 |
+
return x
|
173 |
+
pipe_a = RunnableLambda(lambda x: pipeLog("a:",x))
|
174 |
+
pipe_b = RunnableLambda(lambda x: pipeLog("b:",x))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
|
177 |
|
178 |
+
def rag_query(question: str, history: list[list[str]]) -> str:
|
179 |
+
"""Run a RAG query using own history, not the gradio history"""
|
180 |
+
inputs = { 'question':question }
|
181 |
+
response = final_chain.invoke(inputs)
|
182 |
+
# print(response)
|
183 |
+
memory.save_context(inputs, {"answer": response["answer"].content})
|
184 |
# sources = [ doc.metadata['source'] for doc in response['context'] ]
|
185 |
# print(response, '\n', sources)
|
186 |
return response['answer'].content
|
187 |
|
188 |
|
189 |
+
def test_query(question):
|
190 |
+
print('QUESTION:', question)
|
191 |
+
answer = rag_query(question, None)
|
192 |
+
print('ANSWER: ', answer, '\n')
|
193 |
|
194 |
+
# test_query("What is the capital of France?")
|
195 |
+
# test_query("What is a Blockchain?")
|
196 |
+
# test_query("What is it useful for?")
|
197 |
|
198 |
|
199 |
gr.ChatInterface(
|
200 |
+
rag_query,
|
201 |
title="RAG Chatbot demo",
|
202 |
description="A chatbot doing Retrieval Augmented Generation, backed by a Pinecone vector database"
|
203 |
).launch()
|
test.ipynb
CHANGED
@@ -20,7 +20,7 @@
|
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
-
"execution_count":
|
24 |
"metadata": {},
|
25 |
"outputs": [],
|
26 |
"source": [
|
@@ -91,7 +91,7 @@
|
|
91 |
},
|
92 |
{
|
93 |
"cell_type": "code",
|
94 |
-
"execution_count":
|
95 |
"metadata": {},
|
96 |
"outputs": [],
|
97 |
"source": [
|
@@ -100,9 +100,7 @@
|
|
100 |
"\n",
|
101 |
"embeddings = HuggingFaceEmbeddings(model_name=os.getenv(\"EMBEDDINGS_MODEL\"))\n",
|
102 |
" \n",
|
103 |
-
"pc = Pinecone(\n",
|
104 |
-
" api_key=os.getenv(\"PINECONE_API_KEY\")\n",
|
105 |
-
" )\n",
|
106 |
"index = pc.Index(setid)\n",
|
107 |
"vectorstore = PineconeVectorStore(index, embeddings, \"text\")\n",
|
108 |
"retriever = vectorstore.as_retriever(kwargs={\"k\":5}) # Find 5 documents\n"
|
@@ -117,7 +115,7 @@
|
|
117 |
},
|
118 |
{
|
119 |
"cell_type": "code",
|
120 |
-
"execution_count":
|
121 |
"metadata": {},
|
122 |
"outputs": [],
|
123 |
"source": [
|
@@ -139,14 +137,14 @@
|
|
139 |
},
|
140 |
{
|
141 |
"cell_type": "code",
|
142 |
-
"execution_count":
|
143 |
"metadata": {},
|
144 |
"outputs": [
|
145 |
{
|
146 |
"name": "stdout",
|
147 |
"output_type": "stream",
|
148 |
"text": [
|
149 |
-
"content='Based on the provided document, a blockchain is a distributed ledger technology that
|
150 |
]
|
151 |
}
|
152 |
],
|
@@ -175,7 +173,7 @@
|
|
175 |
},
|
176 |
{
|
177 |
"cell_type": "code",
|
178 |
-
"execution_count":
|
179 |
"metadata": {},
|
180 |
"outputs": [
|
181 |
{
|
@@ -184,7 +182,7 @@
|
|
184 |
"'Que es blockchain? : Blockchain es una cadena de bloques\\nPara que se usa : Para registrar transacciones\\n'"
|
185 |
]
|
186 |
},
|
187 |
-
"execution_count":
|
188 |
"metadata": {},
|
189 |
"output_type": "execute_result"
|
190 |
}
|
@@ -205,16 +203,16 @@
|
|
205 |
},
|
206 |
{
|
207 |
"cell_type": "code",
|
208 |
-
"execution_count":
|
209 |
"metadata": {},
|
210 |
"outputs": [
|
211 |
{
|
212 |
"data": {
|
213 |
"text/plain": [
|
214 |
-
"AIMessage(content='En la
|
215 |
]
|
216 |
},
|
217 |
-
"execution_count":
|
218 |
"metadata": {},
|
219 |
"output_type": "execute_result"
|
220 |
}
|
@@ -238,7 +236,7 @@
|
|
238 |
},
|
239 |
{
|
240 |
"cell_type": "code",
|
241 |
-
"execution_count":
|
242 |
"metadata": {},
|
243 |
"outputs": [],
|
244 |
"source": [
|
@@ -259,21 +257,21 @@
|
|
259 |
},
|
260 |
{
|
261 |
"cell_type": "code",
|
262 |
-
"execution_count":
|
263 |
"metadata": {},
|
264 |
"outputs": [
|
265 |
{
|
266 |
"data": {
|
267 |
"text/plain": [
|
268 |
-
"{'context': [Document(page_content='
|
269 |
-
" Document(page_content='
|
270 |
-
" Document(page_content='
|
271 |
-
" Document(page_content='
|
272 |
" 'question': 'What is a blockchain?',\n",
|
273 |
-
" 'answer': 'A blockchain is a
|
274 |
]
|
275 |
},
|
276 |
-
"execution_count":
|
277 |
"metadata": {},
|
278 |
"output_type": "execute_result"
|
279 |
}
|
@@ -285,17 +283,17 @@
|
|
285 |
},
|
286 |
{
|
287 |
"cell_type": "code",
|
288 |
-
"execution_count":
|
289 |
"metadata": {},
|
290 |
"outputs": [
|
291 |
{
|
292 |
"data": {
|
293 |
"text/plain": [
|
294 |
"('What is a blockchain?',\n",
|
295 |
-
" 'A blockchain is a
|
296 |
]
|
297 |
},
|
298 |
-
"execution_count":
|
299 |
"metadata": {},
|
300 |
"output_type": "execute_result"
|
301 |
}
|
@@ -306,17 +304,17 @@
|
|
306 |
},
|
307 |
{
|
308 |
"cell_type": "code",
|
309 |
-
"execution_count":
|
310 |
"metadata": {},
|
311 |
"outputs": [
|
312 |
{
|
313 |
"name": "stdout",
|
314 |
"output_type": "stream",
|
315 |
"text": [
|
316 |
-
"
|
|
|
317 |
"BlockchainBased-2023.txt\n",
|
318 |
-
"ExploringBC-2023.txt\n"
|
319 |
-
"B-CoC-2020.txt\n"
|
320 |
]
|
321 |
}
|
322 |
],
|
@@ -327,19 +325,19 @@
|
|
327 |
},
|
328 |
{
|
329 |
"cell_type": "code",
|
330 |
-
"execution_count":
|
331 |
"metadata": {},
|
332 |
"outputs": [
|
333 |
{
|
334 |
"data": {
|
335 |
"text/plain": [
|
336 |
-
"['
|
|
|
337 |
" 'BlockchainBased-2023.txt',\n",
|
338 |
-
" 'ExploringBC-2023.txt'
|
339 |
-
" 'B-CoC-2020.txt']"
|
340 |
]
|
341 |
},
|
342 |
-
"execution_count":
|
343 |
"metadata": {},
|
344 |
"output_type": "execute_result"
|
345 |
}
|
@@ -1212,7 +1210,7 @@
|
|
1212 |
"name": "python",
|
1213 |
"nbconvert_exporter": "python",
|
1214 |
"pygments_lexer": "ipython3",
|
1215 |
-
"version": "3.
|
1216 |
}
|
1217 |
},
|
1218 |
"nbformat": 4,
|
|
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
+
"execution_count": 2,
|
24 |
"metadata": {},
|
25 |
"outputs": [],
|
26 |
"source": [
|
|
|
91 |
},
|
92 |
{
|
93 |
"cell_type": "code",
|
94 |
+
"execution_count": 5,
|
95 |
"metadata": {},
|
96 |
"outputs": [],
|
97 |
"source": [
|
|
|
100 |
"\n",
|
101 |
"embeddings = HuggingFaceEmbeddings(model_name=os.getenv(\"EMBEDDINGS_MODEL\"))\n",
|
102 |
" \n",
|
103 |
+
"pc = Pinecone( api_key=os.getenv(\"PINECONE_API_KEY\") )\n",
|
|
|
|
|
104 |
"index = pc.Index(setid)\n",
|
105 |
"vectorstore = PineconeVectorStore(index, embeddings, \"text\")\n",
|
106 |
"retriever = vectorstore.as_retriever(kwargs={\"k\":5}) # Find 5 documents\n"
|
|
|
115 |
},
|
116 |
{
|
117 |
"cell_type": "code",
|
118 |
+
"execution_count": 6,
|
119 |
"metadata": {},
|
120 |
"outputs": [],
|
121 |
"source": [
|
|
|
137 |
},
|
138 |
{
|
139 |
"cell_type": "code",
|
140 |
+
"execution_count": 7,
|
141 |
"metadata": {},
|
142 |
"outputs": [
|
143 |
{
|
144 |
"name": "stdout",
|
145 |
"output_type": "stream",
|
146 |
"text": [
|
147 |
+
"content='Based on the provided document, a blockchain is a type of distributed ledger technology that implements a decentralized, fully replicated append-only ledger in a peer-to-peer network. It consists of a chain of blocks, where each block contains a list of validated and timestamped transactions. Blockchain technology is known for its secure and immutable record-keeping of digital transactions, as well as its resistance to tampering and censorship due to its decentralized nature. In a blockchain network, multiple participants, or nodes, maintain copies of the ledger, and processing and verifying transactions are the responsibility of every node. Blockchain technology can be classified as public, private/permissioned, or hybrid.' response_metadata={'token_usage': {'completion_time': 0.269, 'completion_tokens': 151, 'prompt_time': 1.5510000000000002, 'prompt_tokens': 1712, 'queue_time': None, 'total_time': 1.8200000000000003, 'total_tokens': 1863}, 'model_name': 'mixtral-8x7b-32768', 'system_fingerprint': 'fp_13a4b82d64', 'finish_reason': 'stop', 'logprobs': None}\n"
|
148 |
]
|
149 |
}
|
150 |
],
|
|
|
173 |
},
|
174 |
{
|
175 |
"cell_type": "code",
|
176 |
+
"execution_count": 8,
|
177 |
"metadata": {},
|
178 |
"outputs": [
|
179 |
{
|
|
|
182 |
"'Que es blockchain? : Blockchain es una cadena de bloques\\nPara que se usa : Para registrar transacciones\\n'"
|
183 |
]
|
184 |
},
|
185 |
+
"execution_count": 8,
|
186 |
"metadata": {},
|
187 |
"output_type": "execute_result"
|
188 |
}
|
|
|
203 |
},
|
204 |
{
|
205 |
"cell_type": "code",
|
206 |
+
"execution_count": 9,
|
207 |
"metadata": {},
|
208 |
"outputs": [
|
209 |
{
|
210 |
"data": {
|
211 |
"text/plain": [
|
212 |
+
"AIMessage(content='En la tecnología de blockchain, \"consenso\" se refiere al mecanismo por el cual se llega a un acuerdo sobre el estado del registro distribuido. Hay varios algoritmos de consenso, como Proof of Work (PoW) y Proof of Stake (PoS), que se utilizan para asegurar la exactitud y la validez de las transacciones en la red blockchain. El algoritmo de consenso ayuda a evitar la duplicación de entradas y garantiza que las transacciones sean seguras y verificables.\\n\\nEn resumen, consenso en blockchain es el proceso de llegar a un acuerdo sobre el estado del registro distribuido, usando algoritmos para asegurar la exactitud y validez de las transacciones.', response_metadata={'token_usage': {'completion_time': 0.355, 'completion_tokens': 195, 'prompt_time': 0.063, 'prompt_tokens': 68, 'queue_time': None, 'total_time': 0.418, 'total_tokens': 263}, 'model_name': 'mixtral-8x7b-32768', 'system_fingerprint': 'fp_1cc6d039b0', 'finish_reason': 'stop', 'logprobs': None})"
|
213 |
]
|
214 |
},
|
215 |
+
"execution_count": 9,
|
216 |
"metadata": {},
|
217 |
"output_type": "execute_result"
|
218 |
}
|
|
|
236 |
},
|
237 |
{
|
238 |
"cell_type": "code",
|
239 |
+
"execution_count": 10,
|
240 |
"metadata": {},
|
241 |
"outputs": [],
|
242 |
"source": [
|
|
|
257 |
},
|
258 |
{
|
259 |
"cell_type": "code",
|
260 |
+
"execution_count": 11,
|
261 |
"metadata": {},
|
262 |
"outputs": [
|
263 |
{
|
264 |
"data": {
|
265 |
"text/plain": [
|
266 |
+
"{'context': [Document(page_content='2\\n\\nBackground\\n\\n2.1\\n\\nBlockchain technology\\n\\nThe blockchain technology implements a decentralized fully replicated append-only ledger in a\\npeer-to-peer network, originally employed for the Bitcoin cryptocurrency [7]. All participating\\nnodes maintain a full local copy of the blockchain. The blockchain consists of a sequence\\nof blocks containing the transactions of the ledger. Transactions inside blocks are sorted\\nchronologically and each block contains a cryptographic hash of the previous block in the\\nchain. Nodes create new blocks as they receives transactions, which are broadcast in the\\nnetwork. Once a block is complete, they start the consensus process to convince other nodes\\nto include it in the blockchain. In the original blockchain technology employed in Bitcoin\\nthe consensus process is based on Proof-of-Work (PoW) [7]. With PoW nodes compete with\\neach other in confirming transactions and creating new blocks by solving a mathematical\\npuzzle. While solving a block is a computational intensive task, verifying its validity is easy.\\nTo incentivize such mechanism, solving a block also results in mining a certain amount of\\n\\n\\x0cS. Bonomi, M. Casini, and C. Ciccotelli\\n\\n12:3', metadata={'chunk': 4.0, 'source': 'B-CoC-2020.txt'}),\n",
|
267 |
+
" Document(page_content='2\\n\\nBackground\\n\\n2.1\\n\\nBlockchain technology\\n\\nThe blockchain technology implements a decentralized fully replicated append-only ledger in a\\npeer-to-peer network, originally employed for the Bitcoin cryptocurrency [7]. All participating\\nnodes maintain a full local copy of the blockchain. The blockchain consists of a sequence\\nof blocks containing the transactions of the ledger. Transactions inside blocks are sorted\\nchronologically and each block contains a cryptographic hash of the previous block in the\\nchain. Nodes create new blocks as they receives transactions, which are broadcast in the\\nnetwork. Once a block is complete, they start the consensus process to convince other nodes\\nto include it in the blockchain. In the original blockchain technology employed in Bitcoin\\nthe consensus process is based on Proof-of-Work (PoW) [7]. With PoW nodes compete with\\neach other in confirming transactions and creating new blocks by solving a mathematical\\npuzzle. While solving a block is a computational intensive task, verifying its validity is easy.\\nTo incentivize such mechanism, solving a block also results in mining a certain amount of\\n\\n\\x0cS. Bonomi, M. Casini, and C. Ciccotelli\\n\\n12:3', metadata={'chunk': 4.0, 'source': 'OASIcs-Tokenomics-2019-12.txt'}),\n",
|
268 |
+
" Document(page_content='2.5. Components in blockchain technology\\nThe structure of a blockchain is a decentralized database consisting\\nof a chain of blocks that contain transactions, with each block linked to\\nthe previous one through cryptographic hashes, creating an immutable\\nand secure ledger of transactions as shown in Fig. 1. This structure en\\xad\\nables trust and transparency in the network by allowing participants to\\nverify and validate transactions without the need for intermediaries. The\\ncomponent used for blockchain technology are as follows:\\n(a) Node: A node in a blockchain network is a system, or it can be a\\nrouter or switch. It’s possible to create a dispersed network of\\nnodes with equal rights by using a P2P network. Processing and\\nverifying transactions are the exclusive responsibility of every\\nnode in the network [33].\\n(b) Transactions: Transactions are the smallest and most funda\\xad\\nmental part of the Blockchain. In blockchain technology, a record\\nacts as a transaction for payment history that includes the sender\\nand recipient address and a timestamp of the occurrence of a\\ntransaction. In a blockchain network, the storage, analysis, and\\nretrieval of completed transactions are important aspects of\\nmaintaining the integrity and transparency of the network [34].\\n(c) Block: The procedures for block validation are depicted by the\\nblock version number given to each block in the Blockchain. A\\ntimestamp value indicates when the particular block was\\n\\n2.4. Blockchain technology\\nBlockchain technology is a distributed ledger that is immutable,\\n4\\n\\n\\x0cSakshi et al.\\n\\nJournal of Information Security and Applications 77 (2023) 103579\\n\\nFig. 1. Blockchain Structure.', metadata={'chunk': 19.0, 'source': 'BlockchainBased-2023.txt'}),\n",
|
269 |
+
" Document(page_content='The review was based on resources from four established scientific databases. A total\\nof 72 resources were found in these databases, of which 26 resources were fully analyzed\\nand provided evidence of the status of the research of blockchain-based solutions to solve\\nproblems related to the chain of custody of physical evidence and of how the current\\nliterature relates to the concept of physical evidence. The final selected resources (37%)\\nsufficiently represented a diverse range of perspectives and findings, enabling this article\\nto draw relevant conclusions and to contribute to the existing knowledge on the topic.\\nThe other sections of this paper are organized as follows. Section 2 provides the main\\nconcepts discussed in this paper, and Section 3 highlights current literature reviews focusing\\non the use of blockchain in the forensic field. Section 4 explains the research methodology.\\nSection 5 provides the results, and Section 6 the discussion. Finally, Section 7 presents the\\nlimitations and proposed future research and Section 8 concludes the paper.\\n2. Background\\nBlockchain technology has emerged as a disruptive innovation, providing a decentralized and transparent environment across various domains. Blockchain can be understood\\nas a distributed ledger technology that enables secure and immutable record-keeping of\\ndigital transactions. It comprises a chain of blocks, each containing a list of validated and\\ntime-stamped transactions. An interesting feature of blockchain is its decentralized nature,\\nwhere multiple participants, or nodes, maintain copies of the ledger. This distributed\\nconsensus mechanism ensures that no single entity has control over the entire network,\\nmaking it resistant to tampering and censorship. Thus, blockchain is ripe for contexts\\ninvolving multiple parties with a need for a reliable and trustworthy ambiance in the\\nregistering of sensitive information, since it can “allow for an audit trail of all operations\\ncarried out between peers without the need for a centralized authority” (Grima et al. 2021).\\nBlockchains can be classified as public, private/permissioned, or hybrid. Public\\nblockchain allows any interested party to be a node in the network and to participate in\\nthe consensus. Registered data can be viewed by members or non-members. In its turn,', metadata={'chunk': 3.0, 'source': 'ExploringBC-2023.txt'})],\n",
|
270 |
" 'question': 'What is a blockchain?',\n",
|
271 |
+
" 'answer': 'A blockchain is a decentralized fully replicated append-only ledger in a peer-to-peer network, consisting of a chain of blocks containing transactions of the ledger. Each block contains a cryptographic hash of the previous block in the chain, creating an immutable and secure ledger of transactions. The structure enables trust and transparency in the network by allowing participants to verify and validate transactions without the need for intermediaries. It comprises components such as nodes, transactions, and blocks. Nodes maintain a full local copy of the blockchain and are responsible for processing and verifying transactions. Transactions are the smallest and most fundamental part of the blockchain, while blocks are linked to the previous one through cryptographic hashes. The procedures for block validation are depicted by the block version number given to each block in the blockchain. A timestamp value indicates when the particular block was created.'}"
|
272 |
]
|
273 |
},
|
274 |
+
"execution_count": 11,
|
275 |
"metadata": {},
|
276 |
"output_type": "execute_result"
|
277 |
}
|
|
|
283 |
},
|
284 |
{
|
285 |
"cell_type": "code",
|
286 |
+
"execution_count": 12,
|
287 |
"metadata": {},
|
288 |
"outputs": [
|
289 |
{
|
290 |
"data": {
|
291 |
"text/plain": [
|
292 |
"('What is a blockchain?',\n",
|
293 |
+
" 'A blockchain is a decentralized fully replicated append-only ledger in a peer-to-peer network, consisting of a chain of blocks containing transactions of the ledger. Each block contains a cryptographic hash of the previous block in the chain, creating an immutable and secure ledger of transactions. The structure enables trust and transparency in the network by allowing participants to verify and validate transactions without the need for intermediaries. It comprises components such as nodes, transactions, and blocks. Nodes maintain a full local copy of the blockchain and are responsible for processing and verifying transactions. Transactions are the smallest and most fundamental part of the blockchain, while blocks are linked to the previous one through cryptographic hashes. The procedures for block validation are depicted by the block version number given to each block in the blockchain. A timestamp value indicates when the particular block was created.')"
|
294 |
]
|
295 |
},
|
296 |
+
"execution_count": 12,
|
297 |
"metadata": {},
|
298 |
"output_type": "execute_result"
|
299 |
}
|
|
|
304 |
},
|
305 |
{
|
306 |
"cell_type": "code",
|
307 |
+
"execution_count": 13,
|
308 |
"metadata": {},
|
309 |
"outputs": [
|
310 |
{
|
311 |
"name": "stdout",
|
312 |
"output_type": "stream",
|
313 |
"text": [
|
314 |
+
"B-CoC-2020.txt\n",
|
315 |
+
"OASIcs-Tokenomics-2019-12.txt\n",
|
316 |
"BlockchainBased-2023.txt\n",
|
317 |
+
"ExploringBC-2023.txt\n"
|
|
|
318 |
]
|
319 |
}
|
320 |
],
|
|
|
325 |
},
|
326 |
{
|
327 |
"cell_type": "code",
|
328 |
+
"execution_count": 14,
|
329 |
"metadata": {},
|
330 |
"outputs": [
|
331 |
{
|
332 |
"data": {
|
333 |
"text/plain": [
|
334 |
+
"['B-CoC-2020.txt',\n",
|
335 |
+
" 'OASIcs-Tokenomics-2019-12.txt',\n",
|
336 |
" 'BlockchainBased-2023.txt',\n",
|
337 |
+
" 'ExploringBC-2023.txt']"
|
|
|
338 |
]
|
339 |
},
|
340 |
+
"execution_count": 14,
|
341 |
"metadata": {},
|
342 |
"output_type": "execute_result"
|
343 |
}
|
|
|
1210 |
"name": "python",
|
1211 |
"nbconvert_exporter": "python",
|
1212 |
"pygments_lexer": "ipython3",
|
1213 |
+
"version": "3.11.8"
|
1214 |
}
|
1215 |
},
|
1216 |
"nbformat": 4,
|