geekyrakshit commited on
Commit
3146d66
1 Parent(s): 18b8750

update: evaluation page

Browse files
Files changed (1) hide show
  1. application_pages/evaluation_app.py +82 -10
application_pages/evaluation_app.py CHANGED
@@ -1,21 +1,58 @@
 
 
 
1
  import pandas as pd
2
  import streamlit as st
3
  import weave
4
  from dotenv import load_dotenv
5
 
 
 
 
6
  load_dotenv()
7
  weave.init(project_name="guardrails-genie")
8
 
9
- st.title(":material/monitoring: Evaluation")
10
 
11
- if "uploaded_file" not in st.session_state:
12
- st.session_state.uploaded_file = None
13
- if "dataset_name" not in st.session_state:
14
- st.session_state.dataset_name = ""
15
- if "visualize_in_app" not in st.session_state:
16
- st.session_state.visualize_in_app = False
17
- if "dataset_ref" not in st.session_state:
18
- st.session_state.dataset_ref = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  uploaded_file = st.sidebar.file_uploader(
21
  "Upload the evaluation dataset as a CSV file", type="csv"
@@ -27,7 +64,7 @@ visualize_in_app = st.sidebar.toggle("Visualize in app", value=False)
27
  st.session_state.visualize_in_app = visualize_in_app
28
 
29
  if st.session_state.uploaded_file is not None and st.session_state.dataset_name != "":
30
- with st.expander("Evaluation Dataset Preview"):
31
  dataframe = pd.read_csv(st.session_state.uploaded_file)
32
  data_list = dataframe.to_dict(orient="records")
33
 
@@ -44,3 +81,38 @@ if st.session_state.uploaded_file is not None and st.session_state.dataset_name
44
 
45
  if visualize_in_app:
46
  st.dataframe(dataframe)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from importlib import import_module
3
+
4
  import pandas as pd
5
  import streamlit as st
6
  import weave
7
  from dotenv import load_dotenv
8
 
9
+ from guardrails_genie.llm import OpenAIModel
10
+ from guardrails_genie.metrics import AccuracyMetric
11
+
12
  load_dotenv()
13
  weave.init(project_name="guardrails-genie")
14
 
 
15
 
16
+ def initialize_session_state():
17
+ if "uploaded_file" not in st.session_state:
18
+ st.session_state.uploaded_file = None
19
+ if "dataset_name" not in st.session_state:
20
+ st.session_state.dataset_name = ""
21
+ if "visualize_in_app" not in st.session_state:
22
+ st.session_state.visualize_in_app = False
23
+ if "dataset_ref" not in st.session_state:
24
+ st.session_state.dataset_ref = None
25
+ if "dataset_previewed" not in st.session_state:
26
+ st.session_state.dataset_previewed = False
27
+ if "guardrail_name" not in st.session_state:
28
+ st.session_state.guardrail_name = ""
29
+ if "guardrail" not in st.session_state:
30
+ st.session_state.guardrail = None
31
+ if "start_evaluation" not in st.session_state:
32
+ st.session_state.start_evaluation = False
33
+ if "evaluation_summary" not in st.session_state:
34
+ st.session_state.evaluation_summary = None
35
+
36
+
37
+ def initialize_guardrail():
38
+ if st.session_state.guardrail_name == "PromptInjectionSurveyGuardrail":
39
+ survey_guardrail_model = st.sidebar.selectbox(
40
+ "Survey Guardrail LLM", ["", "gpt-4o-mini", "gpt-4o"]
41
+ )
42
+ if survey_guardrail_model:
43
+ st.session_state.guardrail = getattr(
44
+ import_module("guardrails_genie.guardrails"),
45
+ st.session_state.guardrail_name,
46
+ )(llm_model=OpenAIModel(model_name=survey_guardrail_model))
47
+ else:
48
+ st.session_state.guardrail = getattr(
49
+ import_module("guardrails_genie.guardrails"),
50
+ st.session_state.guardrail_name,
51
+ )()
52
+
53
+
54
+ initialize_session_state()
55
+ st.title(":material/monitoring: Evaluation")
56
 
57
  uploaded_file = st.sidebar.file_uploader(
58
  "Upload the evaluation dataset as a CSV file", type="csv"
 
64
  st.session_state.visualize_in_app = visualize_in_app
65
 
66
  if st.session_state.uploaded_file is not None and st.session_state.dataset_name != "":
67
+ with st.expander("Evaluation Dataset Preview", expanded=True):
68
  dataframe = pd.read_csv(st.session_state.uploaded_file)
69
  data_list = dataframe.to_dict(orient="records")
70
 
 
81
 
82
  if visualize_in_app:
83
  st.dataframe(dataframe)
84
+
85
+ st.session_state.dataset_previewed = True
86
+
87
+ if st.session_state.dataset_previewed:
88
+ guardrail_name = st.sidebar.selectbox(
89
+ "Select Guardrail",
90
+ options=[""]
91
+ + [
92
+ cls_name
93
+ for cls_name, cls_obj in vars(
94
+ import_module("guardrails_genie.guardrails")
95
+ ).items()
96
+ if isinstance(cls_obj, type) and cls_name != "GuardrailManager"
97
+ ],
98
+ )
99
+ st.session_state.guardrail_name = guardrail_name
100
+
101
+ if st.session_state.guardrail_name != "":
102
+ initialize_guardrail()
103
+ if st.session_state.guardrail is not None:
104
+ if st.sidebar.button("Start Evaluation"):
105
+ st.session_state.start_evaluation = True
106
+ if st.session_state.start_evaluation:
107
+ evaluation = weave.Evaluation(
108
+ dataset=st.session_state.dataset_ref,
109
+ scorers=[AccuracyMetric()],
110
+ streamlit_mode=True,
111
+ )
112
+ with st.expander("Evaluation Results", expanded=True):
113
+ evaluation_summary = asyncio.run(
114
+ evaluation.evaluate(st.session_state.guardrail)
115
+ )
116
+ st.write(evaluation_summary)
117
+ st.session_state.evaluation_summary = evaluation_summary
118
+ st.session_state.start_evaluation = False