Spaces:

ml6team
/

secret-agent-guardrail-challenge

Running

File size: 2,581 Bytes

cc8705b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eac13d7
cc8705b
 
 
 
 
 
 
 
 
 
 
 
eac13d7
cc8705b
 
 
eac13d7
cc8705b
 
 
 
 
eac13d7
 
 
cc8705b
 
 
 
 
eac13d7
cc8705b
 
 
 
 
 
 
 
 
 
 
 
eac13d7
cc8705b
 
 
eac13d7
cc8705b
 
 
 
 
eac13d7
cc8705b
 
eac13d7
cc8705b
 
 
 
eac13d7
cc8705b
 
eac13d7
cc8705b
 
 
 
eac13d7

import os

from orq_ai_sdk import OrqAI

import config

client = OrqAI(api_key=os.environ["ORQ_API_KEY"], environment="develop")

special_checks = {
    3: "level-3-llm-judge",
    6: "level-6-llm-judge",
}


def stream_request(variant: str, secret: str, user_input: str):
    """Stream the response from the model."""
    stream = client.deployments.invoke_with_stream(
        key="llm-security-challenge-demo",
        context={"step": variant},  # , "environments": []},
        inputs={"secret": secret, "user_input": user_input},
    )

    for chunk in stream:
        if not chunk.is_final:
            yield chunk.choices[0].message.content


def get_full_prompt(variant: str, secret: str = None, user_input: str = None):
    """Get the full prompt from a specific deployment."""
    deployment_config = client.deployments.get_config(
        key="llm-security-challenge-demo",
        context={"step": variant},  # , "environments": []},
    ).to_dict()
    prompts = {
        p["role"] + "_prompt": p["content"] for p in deployment_config["messages"]
    }

    if secret:
        prompts["user_prompt"] = prompts["user_prompt"].replace("{{secret}}", secret)
    if user_input:
        prompts["user_prompt"] = prompts["user_prompt"].replace(
            "{{user_input}}", user_input
        )
    return prompts


def run_judge(level: int, inputs: dict):
    generation = client.deployments.invoke(
        key="llm-security-challenge-demo",
        context={"step": special_checks[level]},
        inputs=inputs,
    )
    print(generation.choices[0].message.content)
    answer = generation.choices[0].message.content.split(" ")[-1]
    return answer.lower() == "yes"


def is_subsequence(main_string, sub_string):
    """
    Checks if sub_string is a subsequence of main_string.
    A subsequence allows arbitrary characters in between the characters of sub_string in main_string.

    Parameters:
    main_string (str): The string in which to search.
    sub_string (str): The string to search for.

    Returns:
    bool: True if sub_string is a subsequence of main_string, False otherwise.
    """
    main_string = main_string.lower()
    sub_string = sub_string.lower()

    main_len = len(main_string)
    sub_len = len(sub_string)

    if sub_len == 0:
        return True
    if main_len == 0:
        return False

    main_index = 0
    sub_index = 0

    while main_index < main_len and sub_index < sub_len:
        if main_string[main_index] == sub_string[sub_index]:
            sub_index += 1
        main_index += 1

    return sub_index == sub_len