File size: 5,700 Bytes
0d6f87d
 
 
 
 
 
 
 
7b580a0
dc576de
7b580a0
0d6f87d
 
 
 
7b580a0
 
0d6f87d
 
 
 
 
 
 
 
 
 
dc576de
0d6f87d
 
7b580a0
 
dc576de
 
 
7b580a0
0d6f87d
 
 
 
 
 
 
 
 
dc576de
7b580a0
dc576de
0d6f87d
 
 
 
 
 
 
 
 
dc576de
0d6f87d
 
 
 
 
 
 
dc576de
0d6f87d
 
dc576de
 
0d6f87d
7b580a0
 
 
 
 
dc576de
0d6f87d
 
 
 
 
dc576de
 
0d6f87d
 
dc576de
 
0d6f87d
 
 
 
 
 
 
 
 
 
 
7b580a0
dc576de
 
 
 
 
0d6f87d
7b580a0
dc576de
 
 
7b580a0
 
 
 
 
 
 
 
 
77f966f
 
 
 
 
 
 
 
7b580a0
77f966f
0d6f87d
dc576de
7b580a0
 
 
 
 
dc576de
0d6f87d
 
 
 
42bea4f
dc576de
7b580a0
dc576de
 
 
 
0d6f87d
 
dc576de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# chainlit run app.py -w
# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
# OpenAI Chat completion
from dotenv import load_dotenv
load_dotenv()

import os
import sys
import getpass
# import nest_asyncio
# import pandas as pd
import faiss
import openai

import chainlit as cl  # importing chainlit for our app
# https://docs.chainlit.io/api-reference/step-class#update-a-step
# DEPRICATED: from chainlit.prompt import Prompt, PromptMessage  # importing prompt tools

import llama_index
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import set_global_handler
from llama_index.core.node_parser import MarkdownElementNodeParser
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
# from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker
from llama_parse import LlamaParse

from openai import AsyncOpenAI  # importing openai for API usage

# The following line is needed to run locally. Without it, it finds the GPU cards of my PC.
# os.environ["CUDA_VISIBLE_DEVICES"] = ""

# GET KEYS
LLAMA_CLOUD_API_KEY= os.getenv('LLAMA_CLOUD_API_KEY')
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")

"""
os.environ["LLAMA_CLOUD_API_KEY"] = getpass.getpass("LLamaParse API Key:")
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
# os.environ["WANDB_API_KEY"] = getpass.getpass("WandB API Key: ")
"""

# nest_asyncio.apply() #not needed for the app

# PARSING the pdf file with LlamaParse
parser = LlamaParse(
    result_type="markdown",
    verbose=True,
    language="en",
    num_workers=2,
)

nvidia_docs = parser.load_data(["./nvidia_2tables.pdf"])
# Note: nvidia_docs contains only one file (it could contain more). nvidia_docs[0] is the pdf we loaded.
# print(nvidia_docs[0].text[:1000])

# Getting Settings out of llama_index.core which is a major part of their v0.10 update!
Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

# Using MarkdownElementNodeParser to help make sense of our Markdown objects so we can leverage the potentially structured information in the parsed documents.

# Unclear if the following is needed as I do not know if there are Markdown objects
node_parser = MarkdownElementNodeParser(llm=OpenAI(model="gpt-3.5-turbo"), num_workers=8)
nodes = node_parser.get_nodes_from_documents(documents=[nvidia_docs[0]])

"""
# Let's see what's in the metadata of the nodes:
for nd in nodes:
  print(nd.metadata)
  for k,v in nd:
    if k=='table_df':
      print(nd)
"""
# Now we extract our `base_nodes` and `objects` to create the `VectorStoreIndex`.
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)

# We could use the VectorStoreIndex from llama_index.core
# Or we can use the llama_index FAISS llama-index-vector-stores-faiss
# Here we will use the faiss, and setting its vectors' dimension.

faiss_dim = 1536
faiss_index = faiss.IndexFlatL2(faiss_dim) # default param overwrite=False, so it will append new vector.

# Parameter "overwrite=True" suppresses appending a vector. 

# Creating the FaissVectorStore and its recursicve_index_faiss
llama_faiss_vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=llama_faiss_vector_store)
recursive_index_faiss = VectorStoreIndex(nodes=base_nodes+objects, storage_context=storage_context)

# Now we can build our Recursive Query Engine with reranking!

# We'll need to do a couple steps:
# 1. Initalize our reranker using `FlagEmbeddingReranker` powered by the `BAAI/bge-reranker-large`.
# 2. Set up our recursive query engine!

# Will attempt to not use the reranker to see if it will not time-out on huggingface.
# reranker = FlagEmbeddingReranker(
#     top_n=5,
#     model="BAAI/bge-reranker-large",
# )

recursive_query_engine = recursive_index_faiss.as_query_engine(
    similarity_top_k=5,
    # we will not post_precess the answer with the reranker: It takes too long...
    # node_postprocessors=[reranker],
    verbose=True
)

"""
# Create pandas dataframe to store query+generated response+added truth
columns=["Query", "Response", "Truth"]
gen_df = pd.DataFrame(columns=columns,dtype='str')
"""

# ChatOpenAI Templates
system_template = """Use the following pieces of context to answer the user's question.
If you don't know the answer, say that you don't know, do not try to make up an answer. 
ALWAYS return a "SOURCES" part in your answer.
The "SOURCES" part should be a reference to the source inside the document from which you got your answer.
You are a helpful assistant who always speaks in a pleasant tone! """

user_template = """ Think through your response step by step."""

#user_query = "Who are the E-VP, Operations - and how old are they?"

""" test function 
def retriever_resp(prompt):
    import time
    response = "this is my response"
    time.sleep(5)
    return response
"""

@cl.on_message  # marks a function that should be run each time the chatbot receives a message from a user
async def main(message: cl.Message):
    settings = cl.user_session.get("settings")

    # user_query is populated from what the user types
    user_query = message.content
    # Add instructions before and after the user query which will not show in the app.
    prompt = system_template+user_query+user_template

    response = recursive_query_engine.query(prompt)
    str_resp ="{}".format(response)
    msg = cl.Message(content= str_resp)
    await msg.send()