geekyrakshit commited on
Commit
2946856
·
1 Parent(s): b077b7d

update: app

Browse files
app.py CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
2
  import weave
3
  from dotenv import load_dotenv
4
 
 
 
5
  from guardrails_genie.llm import OpenAIModel
6
 
7
  load_dotenv()
@@ -10,6 +12,16 @@ weave.init(project_name="guardrails-genie")
10
  openai_model = st.sidebar.selectbox("OpenAI LLM", ["", "gpt-4o-mini", "gpt-4o"])
11
  chat_condition = openai_model != ""
12
 
 
 
 
 
 
 
 
 
 
 
13
  # Use session state to track if the chat has started
14
  if "chat_started" not in st.session_state:
15
  st.session_state.chat_started = False
@@ -40,13 +52,23 @@ if st.session_state.chat_started:
40
  # Add user message to chat history
41
  st.session_state.messages.append({"role": "user", "content": prompt})
42
 
43
- response, call = llm_model.predict.call(
44
- llm_model, user_prompts=prompt, messages=st.session_state.messages
45
  )
46
- response = response.choices[0].message.content
47
 
48
- # Display assistant response in chat message container
49
- with st.chat_message("assistant"):
50
- st.markdown(response + f"\n\n---\n[Explore in Weave]({call.ui_url})")
51
- # Add assistant response to chat history
52
- st.session_state.messages.append({"role": "assistant", "content": response})
 
 
 
 
 
 
 
 
 
 
 
 
2
  import weave
3
  from dotenv import load_dotenv
4
 
5
+ from guardrails_genie.guardrails import GuardrailManager
6
+ from guardrails_genie.guardrails.injection import SurveyGuardrail
7
  from guardrails_genie.llm import OpenAIModel
8
 
9
  load_dotenv()
 
12
  openai_model = st.sidebar.selectbox("OpenAI LLM", ["", "gpt-4o-mini", "gpt-4o"])
13
  chat_condition = openai_model != ""
14
 
15
+ guardrails = []
16
+
17
+ with st.sidebar.expander("Switch on Guardrails"):
18
+ is_survey_guardrail_enabled = st.toggle("Survey Guardrail", value=True)
19
+
20
+ if is_survey_guardrail_enabled:
21
+ guardrails.append(SurveyGuardrail(llm_model=OpenAIModel(model_name="gpt-4o")))
22
+
23
+ guardrails_manager = GuardrailManager(guardrails=guardrails)
24
+
25
  # Use session state to track if the chat has started
26
  if "chat_started" not in st.session_state:
27
  st.session_state.chat_started = False
 
52
  # Add user message to chat history
53
  st.session_state.messages.append({"role": "user", "content": prompt})
54
 
55
+ guardrails_response, call = guardrails_manager.guard.call(
56
+ guardrails_manager, prompt=prompt
57
  )
 
58
 
59
+ if guardrails_response["safe"]:
60
+ response, call = llm_model.predict.call(
61
+ llm_model, user_prompts=prompt, messages=st.session_state.messages
62
+ )
63
+ response = response.choices[0].message.content
64
+
65
+ # Display assistant response in chat message container
66
+ with st.chat_message("assistant"):
67
+ st.markdown(response + f"\n\n---\n[Explore in Weave]({call.ui_url})")
68
+ # Add assistant response to chat history
69
+ st.session_state.messages.append({"role": "assistant", "content": response})
70
+ else:
71
+ st.error("Guardrails detected an issue with the prompt.")
72
+ for alert in guardrails_response["alerts"]:
73
+ st.error(f"{alert['guardrail_name']}: {alert['response']}")
74
+ st.error(f"For details, explore in Weave at {call.ui_url}")
guardrails_genie/guardrails/injection/survey_guardrail.py CHANGED
@@ -74,4 +74,4 @@ Here are some strict instructions that you must follow:
74
  @weave.op()
75
  def guard(self, prompt: str, **kwargs) -> list[str]:
76
  response = self.predict(prompt, **kwargs)
77
- return {"verdict": response.injection_prompt}
 
74
  @weave.op()
75
  def guard(self, prompt: str, **kwargs) -> list[str]:
76
  response = self.predict(prompt, **kwargs)
77
+ return {"safe": not response.injection_prompt}
guardrails_genie/guardrails/manager.py CHANGED
@@ -1,4 +1,5 @@
1
  import weave
 
2
  from weave.flow.obj import Object as WeaveObject
3
 
4
  from .base import Guardrail
@@ -9,8 +10,11 @@ class GuardrailManager(WeaveObject):
9
 
10
  @weave.op()
11
  def guard(self, prompt: str, **kwargs) -> dict:
12
- alerts = []
13
- for guardrail in self.guardrails:
14
  response = guardrail.guard(prompt, **kwargs)
15
- alerts.append({guardrail.name: response})
16
- return alerts
 
 
 
 
1
  import weave
2
+ from rich.progress import track
3
  from weave.flow.obj import Object as WeaveObject
4
 
5
  from .base import Guardrail
 
10
 
11
  @weave.op()
12
  def guard(self, prompt: str, **kwargs) -> dict:
13
+ alerts, safe = [], True
14
+ for guardrail in track(self.guardrails, description="Running guardrails"):
15
  response = guardrail.guard(prompt, **kwargs)
16
+ alerts.append(
17
+ {"guardrail_name": guardrail.__class__.__name__, "response": response}
18
+ )
19
+ safe = safe and response["safe"]
20
+ return {"safe": safe, "alerts": alerts}