DeepVen commited on
Commit
6872416
1 Parent(s): 855f224

Upload 7 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [theme] # You have to add this line
2
+
3
+ #primaryColor = '#FF8C02' # Bright Orange
4
+
5
+ #secondaryColor = '#FF8C02' # Bright Orange
6
+
7
+ #backgroundColor = '#00325B' # Dark Blue
8
+
9
+ #secondaryBackgroundColor = '#55B2FF' # Lighter Blue
10
+
11
+
12
+ #primaryColor="#ff4b4b"
13
+ #backgroundColor="#00325B"
14
+ #secondaryBackgroundColor="#262730"
15
+ #textColor="#fafafa"
16
+ #font="monospace"
17
+
18
+
19
+
20
+ base="light"
21
+ primaryColor="#efa729"
22
+ textColor="#3a0aa6"
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.9 image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory to /code
5
+ WORKDIR /code
6
+
7
+ # Copy the current directory contents into the container at /code
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+
11
+
12
+ # Set up a new user named "user" with user ID 1000
13
+ RUN useradd -m -u 1000 user
14
+ # Switch to the "user" user
15
+ USER user
16
+ # Set home to the user's home directory
17
+ ENV HOME=/home/user \
18
+ PATH=/home/user/.local/bin:$PATH
19
+
20
+ # Install requirements.txt
21
+ RUN pip install --no-cache-dir --upgrade --user -r /code/requirements.txt
22
+
23
+ # Set the working directory to the user's home directory
24
+ WORKDIR $HOME/app
25
+
26
+ EXPOSE 6060
27
+
28
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
29
+ COPY --chown=user . $HOME/app
30
+
31
+ CMD ["streamlit", "run", "LLMInsights.py", "--server.port", "7860"]
LLMInsights.py ADDED
@@ -0,0 +1,534 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import pandas as pd
4
+ import time
5
+
6
+ import phoenix as px
7
+ from phoenix.trace.langchain import OpenInferenceTracer, LangChainInstrumentor
8
+
9
+ #from hallucinator import HallucinatonEvaluater
10
+
11
+ from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
12
+ from langchain.chains.question_answering import load_qa_chain
13
+ from langchain import HuggingFaceHub
14
+ from langchain.prompts import PromptTemplate
15
+
16
+ from langchain.chains import RetrievalQA
17
+ from langchain.callbacks import StdOutCallbackHandler
18
+
19
+ #from langchain.retrievers import KNNRetriever
20
+ from langchain.storage import LocalFileStore
21
+ from langchain.embeddings import CacheBackedEmbeddings
22
+ from langchain.vectorstores import FAISS
23
+
24
+
25
+ from langchain.document_loaders import WebBaseLoader
26
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
27
+
28
+
29
+ import numpy as np
30
+ import streamlit as st
31
+ import pandas as pd
32
+ # from sklearn import datasets
33
+ # from sklearn.ensemble import RandomForestClassifier
34
+
35
+ from PIL import Image
36
+
37
+
38
+ global trace_df
39
+
40
+ # Page config
41
+ st.set_page_config(page_title="RAG PoC", layout="wide")
42
+ st.sidebar.image(Image.open("./test-logo.png"), use_column_width=True)
43
+
44
+ @st.cache_resource
45
+ def tracer_config():
46
+ #phoenix setup
47
+ session = px.launch_app()
48
+ # If no exporter is specified, the tracer will export to the locally running Phoenix server
49
+ tracer = OpenInferenceTracer()
50
+ # If no tracer is specified, a tracer is constructed for you
51
+ LangChainInstrumentor(tracer).instrument()
52
+ time.sleep(3)
53
+ print(session.url)
54
+
55
+ tracer_config()
56
+
57
+
58
+
59
+
60
+ tab1, tab2, tab3 = st.tabs(["📈 **RAG**", "🗃 FactVsHallucinate", "🤖 **RAG Scoring** " ])
61
+
62
+
63
+
64
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
65
+
66
+ # embedding cache
67
+ #store = LocalFileStore("./cache/")
68
+
69
+ # define embedder
70
+ embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
71
+ #embedder=HuggingFaceHub(repo_id="sentence-transformers/all-mpnet-base-v2")
72
+ #embedder = CacheBackedEmbeddings.from_bytes_store(core_embeddings_model, store)
73
+
74
+ # define llm
75
+ llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000})
76
+ #llm=HuggingFaceHub(repo_id="gpt2", model_kwargs={"temperature":1, "max_length":1000000})
77
+ handler = StdOutCallbackHandler()
78
+
79
+ # set global variable
80
+ # vectorstore = None
81
+ # retriever = None
82
+
83
+
84
+ class HallucinatePromptContext:
85
+ def __init__(self):
86
+ self.variables_list = ["query","answer","context"]
87
+ self.base_template = """In this task, you will be presented with a query, a reference text and an answer. The answer is
88
+ generated to the question based on the reference text. The answer may contain false information, you
89
+ must use the reference text to determine if the answer to the question contains false information,
90
+ if the answer is a hallucination of facts. Your objective is to determine whether the reference text
91
+ contains factual information and is not a hallucination. A 'hallucination' in this context refers to
92
+ an answer that is not based on the reference text or assumes information that is not available in
93
+ the reference text. Your response should be a single word: either "factual" or "hallucinated", and
94
+ it should not include any other text or characters. "hallucinated" indicates that the answer
95
+ provides factually inaccurate information to the query based on the reference text. "factual"
96
+ indicates that the answer to the question is correct relative to the reference text, and does not
97
+ contain made up information. Please read the query and reference text carefully before determining
98
+ your response.
99
+
100
+ # Query: {query}
101
+ # Reference text: {context}
102
+ # Answer: {answer}
103
+ Is the answer above factual or hallucinated based on the query and reference text?"""
104
+
105
+
106
+
107
+ class HallucinatonEvaluater:
108
+ def __init__(self, item):
109
+ self.question = item["question"]
110
+ self.answer = item["answer"]
111
+ #self.domain = item["domain"]
112
+ self.context = item["context"]
113
+ self.llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000})
114
+
115
+ def get_prompt_template(self):
116
+ prompt = HallucinatePromptContext()
117
+ template = prompt.base_template
118
+ varialbles = prompt.variables_list
119
+ eval_template = PromptTemplate(input_variables=varialbles, template=template)
120
+ return eval_template
121
+
122
+ def evaluate(self):
123
+ prompt = self.get_prompt_template().format(query = self.question, answer = self.answer, context = self.context)
124
+ score = self.llm(prompt)
125
+ return score
126
+
127
+
128
+
129
+
130
+ @st.cache_resource
131
+ def initialize_vectorstore():
132
+
133
+ webpage_loader = WebBaseLoader("https://www.tredence.com/case-studies/forecasting-app-installs-for-a-large-retailer-in-the-us").load()
134
+ webpage_chunks = _text_splitter(webpage_loader)
135
+
136
+ global vectorstore
137
+ global retriever
138
+
139
+ # store embeddings in vector store
140
+ vectorstore = FAISS.from_documents(webpage_chunks, embedder)
141
+ print("vector store initialized with sample doc")
142
+
143
+ # instantiate a retriever
144
+ retriever = vectorstore.as_retriever()
145
+ st.session_state['vectorstore'] = vectorstore
146
+ st.session_state['docadd'] = 0
147
+
148
+ return retriever
149
+
150
+
151
+ def _text_splitter(doc):
152
+ text_splitter = RecursiveCharacterTextSplitter(
153
+ chunk_size=600,
154
+ chunk_overlap=50,
155
+ length_function=len,
156
+ )
157
+ return text_splitter.transform_documents(doc)
158
+
159
+ def _load_docs(path: str):
160
+ load_doc = WebBaseLoader(path).load()
161
+ doc = _text_splitter(load_doc)
162
+ return doc
163
+
164
+
165
+
166
+
167
+
168
+
169
+ def rag_response(response):
170
+ #st.markdown("""<hr style="height:10px;border:none;color:#333;background-color:#333;" /> """, unsafe_allow_html=True)
171
+
172
+ #st.markdown(".stTextInput > label {font-size:105%; font-weight:bold; color:blue;} ",unsafe_allow_html=True) #for all text-input label sections
173
+
174
+ question_title = '<h1 style="color:#33ff33;font-size:24px;">Question</h1>'
175
+
176
+
177
+
178
+ st.markdown('<h1 style="color:#100170;font-size:48px;text-align:center;">RAG Response</h1>', unsafe_allow_html=True)
179
+ st.markdown('<h1 style="color:#100170;font-size:24px;">Question</h1>', unsafe_allow_html=True)
180
+ st.text_area(label="", value=response["query"], height=30)
181
+ st.markdown('<h1 style="color:#100170;font-size:24px;">RAG Output</h1>', unsafe_allow_html=True)
182
+ st.text_area(label="", value=response["result"])
183
+ # st.markdown('<h1 style="color:#100170;font-size:24px;">Augmented knowledge</h1>', unsafe_allow_html=True)
184
+ # st.text_area(label="", value=response["source_documents"])
185
+
186
+ #st.button("Check Hallucination")
187
+
188
+
189
+
190
+
191
+
192
+ # Create extractor instance
193
+ def _create_hallucination_scenario(item):
194
+ score = HallucinatonEvaluater(item).evaluate()
195
+ return score
196
+
197
+ def hallu_eval(question: str, answer: str, context: str):
198
+ print("in hallu eval")
199
+ hallucination_score = _create_hallucination_scenario({
200
+ "question": question,
201
+ "answer": answer,
202
+ "context": context
203
+ }
204
+ )
205
+ print("got hallu score")
206
+ st.markdown('<h1 style="color:#100170;font-size:24px;">Hallucinated?</h1>', unsafe_allow_html=True)
207
+ st.text_area(label=" ", value=hallucination_score, height=30)
208
+ #return {"hallucination_score": hallucination_score}
209
+ #time.sleep(10)
210
+
211
+
212
+ def scoring_eval(question: str, answer: str, context: str):
213
+ print("in scoring eval")
214
+ score = _create_evaluation_scenario({
215
+ "question": question,
216
+ "answer": answer,
217
+ "context": context
218
+ }
219
+ )
220
+ print("got score")
221
+ st.markdown('<h1 style="color:#100170;font-size:24px;">Score</h1>', unsafe_allow_html=True)
222
+ st.text_area(label=" ", value=score, height=30)
223
+ #return {"hallucination_score": hallucination_score}
224
+ #time.sleep(10)
225
+
226
+
227
+
228
+ # if 'clicked' not in st.session_state:
229
+ # print("set state to False")
230
+ # st.session_state.clicked = False
231
+
232
+
233
+ def click_button(response):
234
+ # print("set state to True")
235
+ # st.session_state.clicked = True
236
+
237
+ hallu_eval(response["query"], response["result"], "blah blah")
238
+
239
+
240
+ class BasePromptContext:
241
+ def __init__(self):
242
+ self.variables_list = ["question","answer","context"]
243
+ self.base_template = """Please act as an impartial judge and evaluate the quality of the provided answer which attempts to answer the provided question based on a provided context.
244
+ And you'll need to submit your grading for the correctness, comprehensiveness and readability of the answer, using JSON format with the 2 items in parenthesis:
245
+ ("score": [your score number for the correctness of the answer], "reasoning": [your one line step by step reasoning about the correctness of the answer])
246
+ Below is your grading rubric:
247
+ - Correctness: If the answer correctly answer the question, below are the details for different scores:
248
+ - Score 0: the answer is completely incorrect, doesn’t mention anything about the question or is completely contrary to the correct answer.
249
+ - For example, when asked “How to terminate a databricks cluster”, the answer is empty string, or content that’s completely irrelevant, or sorry I don’t know the answer.
250
+ - Score 4: the answer provides some relevance to the question and answer one aspect of the question correctly.
251
+ - Example:
252
+ - Question: How to terminate a databricks cluster
253
+ - Answer: Databricks cluster is a cloud-based computing environment that allows users to process big data and run distributed data processing tasks efficiently.
254
+ - Or answer: In the Databricks workspace, navigate to the "Clusters" tab. And then this is a hard question that I need to think more about it
255
+ - Score 7: the answer mostly answer the question but is missing or hallucinating on one critical aspect.
256
+ - Example:
257
+ - Question: How to terminate a databricks cluster”
258
+ - Answer: “In the Databricks workspace, navigate to the "Clusters" tab.
259
+ Find the cluster you want to terminate from the list of active clusters.
260
+ And then you’ll find a button to terminate all clusters at once”
261
+ - Score 10: the answer correctly answer the question and not missing any major aspect
262
+ - Example:
263
+ - Question: How to terminate a databricks cluster
264
+ - Answer: In the Databricks workspace, navigate to the "Clusters" tab.
265
+ Find the cluster you want to terminate from the list of active clusters.
266
+ Click on the down-arrow next to the cluster name to open the cluster details.
267
+ Click on the "Terminate" button. A confirmation dialog will appear. Click "Terminate" again to confirm the action.”
268
+ Provided question:
269
+ {question}
270
+ Provided answer:
271
+ {answer}
272
+ Provided context:
273
+ {context}
274
+ Please provide your grading for the correctness and explain you gave the particular grading"""
275
+
276
+ class Evaluater:
277
+ def __init__(self, item):
278
+ self.question = item["question"]
279
+ self.answer = item["answer"]
280
+ #self.domain = item["domain"]
281
+ self.context = item["context"]
282
+ self.llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000})
283
+
284
+ def get_prompt_template(self):
285
+ prompt = BasePromptContext()
286
+ template = prompt.base_template
287
+ varialbles = prompt.variables_list
288
+ eval_template = PromptTemplate(input_variables=varialbles, template=template)
289
+ return eval_template
290
+
291
+ def evaluate(self):
292
+ prompt = self.get_prompt_template().format(question = self.question, answer = self.answer, context = self.context)
293
+ score = self.llm(prompt)
294
+ return score
295
+
296
+ # Create extractor instance
297
+ def _create_evaluation_scenario(item):
298
+ score = Evaluater(item).evaluate()
299
+ return score
300
+
301
+
302
+ # Create extractor instance
303
+ def _create_hallucination_scenario(item):
304
+ score = HallucinatonEvaluater(item).evaluate()
305
+ return score
306
+
307
+ #st.write(''' # RAG App''')
308
+
309
+ with tab1:
310
+
311
+ with st.form(" RAG with evaluation - scoring & hallucination "):
312
+ #tab1.subheader(''' # RAG App''')
313
+ initialize_vectorstore()
314
+ if st.session_state['docadd'] == 1:
315
+ retriever = st.session_state['retriever']
316
+ else:
317
+ retriever = initialize_vectorstore()
318
+
319
+ #print("lenght in tab1, ", len(vectorstore.serialize_to_bytes()))
320
+ options = ["true", "false"]
321
+
322
+ st.markdown('<h1 style="color:#100170;font-size:24px;">User Query</h1>', unsafe_allow_html=True)
323
+
324
+ question = st.text_input(label="", value="", placeholder="Type in question",label_visibility="visible", disabled=False)
325
+ #st.markdown('<h2 style="color:#3a0aa6;font-size:24px;">Evaluation</h2>', unsafe_allow_html=True)
326
+ evaluate = st.selectbox(label="***Perform Evaluation?***",options=options, index=1, placeholder="Choose an option", disabled=False, label_visibility="visible")
327
+
328
+ m = st.markdown("""
329
+ <style>
330
+ div.stButton > button:first-child {
331
+ background-color: #100170;
332
+ color:#ffffff;
333
+ }
334
+ div.stButton > button:hover {
335
+ background-color: #00ff00;
336
+ color:#ff0000;
337
+ }
338
+ </style>""", unsafe_allow_html=True)
339
+
340
+ #st.markdown("----", unsafe_allow_html=True)
341
+ columns = st.columns([2,1,2])
342
+
343
+ if columns[1].form_submit_button(" Start RAG "):
344
+
345
+ st.markdown("""<hr style="height:10px;border:none;color:#333;background-color: #100170;" /> """, unsafe_allow_html=True)
346
+
347
+ print("retrie ,", retriever)
348
+ chain = RetrievalQA.from_chain_type(
349
+ llm=llm,
350
+ retriever=retriever,
351
+ callbacks=[handler],
352
+ return_source_documents=True
353
+ )
354
+
355
+ #response = chain("how tredence brought good insight?")
356
+ response = chain(question)
357
+ print(response["result"])
358
+
359
+
360
+ rag_response(response)
361
+ #click_button(response)
362
+
363
+
364
+ time.sleep(4)
365
+
366
+ df = px.active_session().get_spans_dataframe()
367
+ #print(px.active_session())
368
+ #print(px.active_session().get_spans_dataframe())
369
+ print(df.count())
370
+ df_sorted = df.sort_values(by='end_time',ascending=False)
371
+
372
+ model_input = json.loads(df_sorted[df_sorted["name"] == "LLMChain"]["attributes.input.value"][0])
373
+ context = model_input["context"]
374
+
375
+ print(context)
376
+
377
+ if evaluate:
378
+ score = _create_evaluation_scenario({
379
+ "question": question,
380
+ "answer": response['result'],
381
+ "context": context
382
+ })
383
+ hallucination_score = _create_hallucination_scenario({
384
+ "question": question,
385
+ "answer": response['result'],
386
+ "context": context
387
+ }
388
+ )
389
+ else:
390
+ score = "Evaluation is Turned OFF"
391
+ st.markdown('<h1 style="color:#100170;font-size:24px;">Completeness Score</h1>', unsafe_allow_html=True)
392
+ st.text_area(label=" ", value=score, height=30)
393
+ st.markdown('<h1 style="color:#100170;font-size:24px;">Hallucinated?</h1>', unsafe_allow_html=True)
394
+ st.text_area(label=" ", value=hallucination_score, height=30)
395
+ st.markdown('<h1 style="color:#100170;font-size:24px;">context</h1>', unsafe_allow_html=True)
396
+ st.text_area(label="", value=context)
397
+ st.markdown('<h1 style="color:#100170;font-size:24px;">Augmented knowledge</h1>', unsafe_allow_html=True)
398
+ st.text_area(label="", value=response["source_documents"])
399
+
400
+
401
+
402
+ # if st.session_state.clicked:
403
+
404
+ # # The message and nested widget will remain on the page
405
+ # hallu_eval(response["query"], response["result"], "blah blah")
406
+
407
+
408
+ # print("in if for hallu")
409
+
410
+
411
+
412
+ with tab2:
413
+
414
+
415
+
416
+ with st.form(" LLM-aasisted evaluation of Hallucination"):
417
+
418
+
419
+ #print("lenght in tab2, ", len(vectorstore.serialize_to_bytes()))
420
+ question = st.text_input(label="**Question**", value="", label_visibility="visible", disabled=False)
421
+ answer = st.text_input(label="**answer**", value="", label_visibility="visible", disabled=False)
422
+ context = st.text_input(label="**context**", value="", label_visibility="visible", disabled=False)
423
+
424
+
425
+ if st.form_submit_button("Evaluate"):
426
+ hallu_eval(question, answer, context)
427
+
428
+
429
+ with tab3:
430
+
431
+
432
+ with st.form("RAG scoring"):
433
+
434
+
435
+ #print("lenght in tab2, ", len(vectorstore.serialize_to_bytes()))
436
+ question = st.text_input(label="**Question**", value="", label_visibility="visible", disabled=False)
437
+ answer = st.text_input(label="**answer**", value="", label_visibility="visible", disabled=False)
438
+ context = st.text_input(label="**context**", value="", label_visibility="visible", disabled=False)
439
+
440
+
441
+ if st.form_submit_button("Evaluate"):
442
+ scoring_eval(question, answer, context)
443
+
444
+
445
+
446
+ print("activ session: ", px.active_session().get_spans_dataframe())
447
+ trace_df = px.active_session().get_spans_dataframe()
448
+
449
+ st.session_state['trace_df'] = trace_df
450
+
451
+ # with tab3:
452
+
453
+
454
+
455
+ # with st.form(" trace"):
456
+
457
+ # if px.active_session():
458
+ # df0 = px.active_session().get_spans_dataframe()
459
+ # if not df0.empty:
460
+ # df= df0.fillna('')
461
+ # st.dataframe(df)
462
+
463
+
464
+
465
+
466
+
467
+ def rag():
468
+ print("in rag")
469
+ options = ["true", "false"]
470
+ question = st.text_input(label="user question", value="", label_visibility="visible", disabled=False)
471
+ evaluate = st.selectbox(label="select evaluation",options=options, index=0, placeholder="Choose an option", disabled=False, label_visibility="visible")
472
+
473
+
474
+
475
+ if st.button("do RAG"):
476
+ chain = RetrievalQA.from_chain_type(
477
+ llm=llm,
478
+ retriever=retriever,
479
+ callbacks=[handler],
480
+ return_source_documents=True
481
+ )
482
+
483
+ #response = chain("how tredence brought good insight?")
484
+ response = chain(question)
485
+ print(response["result"])
486
+
487
+ # time.sleep(4)
488
+
489
+ # df = px.active_session().get_spans_dataframe()
490
+ # print(px.active_session())
491
+ # print(px.active_session().get_spans_dataframe())
492
+ # print(df.count())
493
+ # df_sorted = df.sort_values(by='end_time',ascending=False)
494
+
495
+ # model_input = json.loads(df_sorted[df_sorted["name"] == "LLMChain"]["attributes.input.value"][0])
496
+ # context = model_input["context"]
497
+
498
+ # print(context)
499
+
500
+ # if evaluate:
501
+ # score = _create_evaluation_scenario({
502
+ # "question": question,
503
+ # "answer": response['result'],
504
+ # "context": context
505
+ # })
506
+ # else:
507
+ # score = "Evaluation is Turned OFF"
508
+
509
+ # return {"question": question, "answer": response['result'], "context": context, "score": score}
510
+ rag_response(response)
511
+
512
+ # if st.button("click me"):
513
+ # click_button(response)
514
+
515
+ click = st.button("Do you want to see more?")
516
+ if click:
517
+ st.session_state.more_stuff = True
518
+
519
+ if st.session_state.more_stuff:
520
+ click_button(response)
521
+ #st.write("Doing more optional stuff")
522
+
523
+
524
+ return(response)
525
+
526
+
527
+ a = st.markdown("""
528
+ <style>
529
+ div.stTextArea > textarea {
530
+ background-color: #0099ff;
531
+ height: 1400px;
532
+ width: 800px;
533
+ }
534
+ </style>""", unsafe_allow_html=True)
pages/DocIndex.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ #from langchain.retrievers import KNNRetriever
3
+ from langchain.storage import LocalFileStore
4
+ from langchain.embeddings import CacheBackedEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ #from streamapp import *
7
+ from PIL import Image
8
+
9
+ from langchain.document_loaders import WebBaseLoader
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+
12
+
13
+ st.sidebar.image(Image.open("./test-logo.png"), use_column_width=True)
14
+
15
+
16
+ print("Loading Index Page!!")
17
+
18
+ #if 'vectorstore' in st.session_state.keys():
19
+ vectorstore = st.session_state['vectorstore']
20
+ # else:
21
+ # retriever = initialize_vectorstore()
22
+ # vectorstore = st.session_state['vectorstore']
23
+
24
+ def _text_splitter(doc):
25
+ text_splitter = RecursiveCharacterTextSplitter(
26
+ chunk_size=600,
27
+ chunk_overlap=50,
28
+ length_function=len,
29
+ )
30
+ return text_splitter.transform_documents(doc)
31
+
32
+ def _load_docs(path: str):
33
+ load_doc = WebBaseLoader(path).load()
34
+ doc = _text_splitter(load_doc)
35
+ return doc
36
+
37
+
38
+ with st.form("Index documents to Vector Store"):
39
+
40
+ file_path = st.text_input(label="Enter the web link", value="", placeholder="", label_visibility="visible", disabled=False)
41
+ print("file_path " ,file_path)
42
+
43
+ submitted = st.form_submit_button("Submit")
44
+
45
+ if submitted:
46
+ st.write("Submitted web link: " + file_path)
47
+ webpage_loader = _load_docs(file_path)
48
+
49
+ webpage_chunks = _text_splitter(webpage_loader)
50
+
51
+ # store embeddings in vector store
52
+ print("vectorstore length before addition, ", len(vectorstore.serialize_to_bytes()))
53
+ vectorstore.add_documents(webpage_chunks)
54
+ print("vectorstore length after addition, ", len(vectorstore.serialize_to_bytes()))
55
+
56
+ st.session_state['vectorstore'] = vectorstore
57
+ retriever = vectorstore.as_retriever()
58
+ st.session_state['retriever'] = retriever
59
+ st.session_state['docadd'] = 1
60
+
61
+ st.markdown('<h2 style="color:#100170;font-size:24px;">Document loaded to vector store successfully!!</h2>', unsafe_allow_html=True)
pages/InsightTrace.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ from PIL import Image
4
+
5
+ #from .streamapp import trace_df
6
+ st.sidebar.image(Image.open("./test-logo.png"), use_column_width=True)
7
+
8
+ print("trace_df ", st.session_state['trace_df'])
9
+
10
+ trace_df = st.session_state['trace_df']
11
+ print(list(trace_df))
12
+
13
+ trace_df = trace_df.loc[:,['name', 'span_kind', 'start_time', 'end_time', 'attributes.__computed__.latency_ms', 'status_code', 'status_message', 'attributes.llm.invocation_parameters', 'attributes.llm.prompts', 'attributes.input.value', 'attributes.output.value', 'attributes.llm.prompt_template.template', 'attributes.llm.prompt_template.variables', 'attributes.llm.prompt_template.version', 'attributes.retrieval.documents']]
14
+ trace_df = trace_df.sort_values(by='start_time', ascending = False)
15
+
16
+ blankIndex=[''] * len(trace_df)
17
+ trace_df.index=blankIndex
18
+
19
+ st.dataframe(trace_df)
20
+
21
+ # if px.active_session():
22
+ # df0 = px.active_session().get_spans_dataframe()
23
+ # if not df0.empty:
24
+ # df= df0.fillna('')
25
+ # st.dataframe(df)
26
+
27
+
28
+ #'name', 'span_kind', 'start_time', 'end_time', 'status_code', 'status_message', 'attributes.llm.invocation_parameters', 'attributes.llm.prompts', 'attributes.input.value', 'attributes.output.value', 'attributes.__computed__.latency_ms', 'attributes.llm.prompt_template.template', 'attributes.llm.prompt_template.variables', 'attributes.llm.prompt_template.version', 'attributes.retrieval.documents'
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.74.*
2
+ requests==2.27.*
3
+ uvicorn[standard]==0.17.*
4
+ sentencepiece==0.1.*
5
+ torch==1.12.*
6
+ transformers==4.*
7
+ sentence-transformers
8
+ langchain==0.0.301
9
+ arize-phoenix
10
+ huggingface_hub
11
+ faiss-cpu
12
+ bs4==0.0.1
13
+ streamlit
test-logo.png ADDED