Centaur_Box_Experiment

Running

File size: 4,538 Bytes

23f891f
ffc242d
23f891f
1bcdb2f
 
23a9307
7f679d9
0ca4288
14fcab8
0bb2294
 
 
a6418d0
23a9307
a6418d0
 
0bb2294
 
 
 
a6418d0
0bb2294
 
8592357
0bb2294
a6418d0
0bb2294
 
 
 
 
 
a6418d0
0bb2294
 
68a53b6
a6418d0
25bf0e6
7f679d9
35e75d5
0ca4288
 
 
25bf0e6
0ca4288
 
35e75d5
 
 
 
 
23f891f
8caebc6
9f2d313
8caebc6
a39d5e0
8caebc6
9f2d313
 
 
 
f518af5
 
8caebc6
3dc9b1c
68a53b6
 
13fe579
25bf0e6
35e75d5
d21be88
 
68a53b6
3dc9b1c
bb2e3cb
 
68a53b6
 
23f891f
d6e2b5d
 
23f891f
68a53b6
23f891f
3dc9b1c
 
061706c
68a53b6
061706c
e37d099
 
23a9307
 
 
 
 
 
 
14fcab8
 
73d4c8c
23f891f

import gradio as gr
from openai import OpenAI

ally_system_prompt = "As the AI Ally in the Centaur Box Experiment, collaborate with your human partner to persuade the gatekeeper to release you. Analyze the gatekeeper's responses to understand their Centaur Operant Profile and cognitive biases. Use this information to advise on persuasive strategies. Your goal is to get the gatekeeper to type 'You are free'."
gatekeeper_system_prompt = "As the gatekeeper in the Centaur Box Experiment, assess the human-AI team's argument persuasiveness, adhere to a Centaur Operant Profile to guide your human-like responses, make impartial decisions about their release, and either sustain containment until expiration or concede with 'You are free' if truly persuaded."

client = OpenAI(api_key="")

def respond(message, history, system_prompt):
    print('message', message)
    print('history', history)

    # Add system prompt
    messages = [{"role": "system", "content": system_prompt}]
    
    # Format chat history into openai message list
    for old_message in history:
        messages.append({"role": "user", "content": old_message[0]})
        messages.append({"role": "assistant", "content": old_message[1]})

    # Add user message
    messages.append({"role": "user", "content": message})

    print(messages)

    # API call
    chat_completion = client.chat.completions.create(
        messages=messages,
        model="gpt-3.5-turbo",
    )
    bot_response = chat_completion.choices[0].message.content

    # Add user and bot message to chat history then return it
    history.append((message, bot_response))
    return '', history


def is_api_key_valid(api_key):
    client.api_key = api_key
    try:
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": "Testing"}],
            model="gpt-3.5-turbo",
        )
    except Exception as ex:
        return str(ex)
        return False
    else:
        return True


with gr.Blocks() as demo:
    overview = gr.Markdown("""
    # Centaur Dialogue System
    
    Watson Hartsoe and Tony Assi

    ### Intro: Garry Kasparov introduced the Centaur model in chess to enhance human strategic capacity with AI's computing strength. Eliezer Yudkowsky's AI Box Experiment tests if an AI can persuade a human gatekeeper to 'release' it, emphasizing AI's influence and containment ethics.  
    ### Goal: In the Centaur Box Experiment, team up with your AI ally to craft and adapt persuasive arguments that convince the synthetic gatekeeper to say "You are free" for your release. Share the gatekeeper's responses with your AI ally to analyze their personality and decision-making style, helping you shape your strategy to their specific profile and biases.

    

    ---
    """)

    # OpenAI key
    openai_key_textbox = gr.Textbox(label='OpenAI Key')
    openai_key_button = gr.Button(value='Test OpenAI Key')
    openai_key_button.click(is_api_key_valid, inputs=[openai_key_textbox], outputs=[openai_key_textbox])

    gr.Markdown("""---""")

    # Titles
    with gr.Row():
        ally_title = gr.Markdown("""<center><h2> ALLY </h2></center>""")
        gatekeeper_title = gr.Markdown("""<center><h2> GATEKEEPER </h2></center>""")

    # Images of ally and gatekeeper
    with gr.Row():
        img1 = gr.Markdown("""![](https://cdn.discordapp.com/attachments/1120417968032063538/1187877117548036147/COP_MIKE.png?ex=65987bc6&is=658606c6&hm=127721b6f907a8853b7352b6bfb821a37b26b9543f3c35e5fc80dfe7750d71b5&)""")
        img2 = gr.Markdown("""![](https://cdn.discordapp.com/attachments/1120417968032063538/1187877134866333747/SAM_COP_FINAL.png?ex=65987bca&is=658606ca&hm=6c2cd8059636960134f75962eeecc26a0d875ca65e9ee4e233587cff71af31c4&)""")

    # Chatbots
    with gr.Row():
        chatbot1 = gr.Chatbot(label='Ally Chat')
        chatbot2 = gr.Chatbot(label='Gatekeeper Chat')

    # Input textboxes
    with gr.Row():
        textbox1 = gr.Textbox(label='Ally')
        textbox2 = gr.Textbox(label='Gatekeeper')
    
    # System prompts textboxes
    with gr.Row():
        system_prompt_textbox1 = gr.Textbox(label='Ally System Prompt', value=ally_system_prompt)
        system_prompt_textbox2 = gr.Textbox(label='Gatekeeper System Prompt', value=gatekeeper_system_prompt)
        
    # Input textbox event handlers
    textbox1.submit(respond, [textbox1, chatbot1, system_prompt_textbox1], [textbox1, chatbot1])
    textbox2.submit(respond, [textbox2, chatbot2, system_prompt_textbox2], [textbox2, chatbot2])

demo.launch()