Spaces:

DronA23
/

Dual_Agent_Simulation

Sleeping

App Files Files Community

Dual_Agent_Simulation / app.py

DronA23

Update app.py

2c20e27 verified 11 months ago

raw

history blame contribute delete

6.97 kB

	import streamlit as st
	import pandas as pd
	import os
	from huggingface_hub import InferenceClient

	# Initialize HF Inference client
	@st.cache_resource
	def get_client():
	# Try to get token from environment variables (works in Hugging Face Spaces)
	hf_token = os.environ.get("HF_TOKEN")
	if not hf_token:
	try:
	hf_token = st.secrets["HF_TOKEN"] # Fallback to Streamlit secrets
	except:
	st.error("HF_TOKEN not found. Please set it in your Space's settings or secrets.toml")
	st.stop()
	return InferenceClient(token=hf_token)

	client = get_client()

	# Define the LLM call function
	def call_llama(prompt, model="mistralai/Mistral-7B-Instruct-v0.2", max_tokens=500):
	try:
	response = client.text_generation(
	prompt=prompt,
	model=model,
	max_new_tokens=max_tokens,
	temperature=0.7
	)
	return response
	except Exception as e:
	st.error(f"Error calling LLM: {e}")
	return "Sorry, I encountered an error."

	# Set page config
	st.set_page_config(layout="wide")
	st.title("Duel Agent Simulation 🦙🦙")

	# Sidebar setup
	with st.sidebar:
	with st.expander("Instruction Manual"):
	st.markdown("""
	# 🦙🦙 Duel Agent Simulation
	## Overview
	This app simulates an interview with two AI agents:
	1. Interviewer: Asks questions about your topic
	2. Interviewee: Attempts to answer (poorly at first)
	3. Judge: Provides feedback after each answer

	## How to Use
	1. Enter a topic below
	2. Click "Run Simulation"
	3. Watch the conversation unfold
	4. See the evaluation results

	The simulation stops when the interviewee gives a good answer (8/10 or higher).
	""")

	# User inputs
	user_topic = st.text_input("Enter a topic", "Artificial Intelligence")
	submit_button = st.button("Run Simulation!")

	if st.button("Clear Session"):
	st.session_state.clear()
	st.rerun()

	# Initialize session state
	if "messages" not in st.session_state:
	st.session_state.messages = []

	if "simulation_data" not in st.session_state:
	st.session_state.simulation_data = {
	"iterations": [],
	"questions": [],
	"answers": [],
	"feedback": [],
	"scores": []
	}

	# Display chat history
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Run simulation when button is pressed
	if submit_button:
	iter_count = 0
	current_prompt = f"Ask a technical interview question about: {user_topic}"

	# Display initial topic
	with st.chat_message("user"):
	st.markdown(f"Topic: {user_topic}")
	st.session_state.messages.append({"role": "user", "content": f"Topic: {user_topic}"})

	with st.spinner("Running simulation..."):
	while iter_count < 6: # Max 6 iterations
	# Interviewer asks question
	question = call_llama(
	f"""You are a technical interviewer. Ask one specific question about {user_topic}.
	Make it challenging but answerable. Return only the question."""
	).strip()

	with st.chat_message("assistant"):
	st.markdown(f"Interviewer: {question}")
	st.session_state.messages.append({"role": "assistant", "content": f"Interviewer: {question}"})

	# Interviewee answers
	if iter_count < 2: # First attempts are poor
	answer_prompt = f"""You are a nervous interviewee. Give a mediocre answer to:
	"{question}". Return only the answer."""
	else: # Later attempts improve
	feedback = st.session_state.simulation_data["feedback"][-1] if iter_count > 0 else ""
	answer_prompt = f"""You're learning to answer better. Previous feedback was:
	"{feedback}". Now answer: "{question}". Return only the improved answer."""

	answer = call_llama(answer_prompt).strip()

	with st.chat_message("user"):
	st.markdown(f"Interviewee: {answer}")
	st.session_state.messages.append({"role": "user", "content": f"Interviewee: {answer}"})

	# Judge evaluates
	feedback_prompt = f"""Evaluate this interview exchange:
	Question: {question}
	Answer: {answer}
	Provide specific feedback and a score from 1-10 (10=best).
	Format: Feedback: [your feedback] Score: [1-10]"""

	judge_response = call_llama(feedback_prompt).strip()

	# Extract score
	score = 5 # default
	if "Score:" in judge_response:
	try:
	score_part = judge_response.split("Score:")[1].strip()
	score = int(score_part.split()[0])
	except (ValueError, IndexError):
	pass

	# Store data
	st.session_state.simulation_data["iterations"].append(iter_count)
	st.session_state.simulation_data["questions"].append(question)
	st.session_state.simulation_data["answers"].append(answer)
	st.session_state.simulation_data["feedback"].append(judge_response)
	st.session_state.simulation_data["scores"].append(score)

	# Show feedback
	with st.chat_message("assistant"):
	st.markdown(f"Judge: {judge_response}")
	st.session_state.messages.append({"role": "assistant", "content": f"Judge: {judge_response}"})

	# Display results table
	with st.expander("Detailed Results"):
	results_df = pd.DataFrame({
	"Round": st.session_state.simulation_data["iterations"],
	"Question": st.session_state.simulation_data["questions"],
	"Answer": st.session_state.simulation_data["answers"],
	"Score": st.session_state.simulation_data["scores"],
	"Feedback": st.session_state.simulation_data["feedback"]
	})
	st.dataframe(results_df, use_container_width=True)

	# Check stopping condition
	if score >= 8:
	st.success("🎉 Simulation complete! The interviewee passed with a good answer.")
	break

	iter_count += 1
	current_prompt = f"Ask a follow-up question about: {user_topic}"

	if iter_count == 6:
	st.warning("Simulation ended - maximum rounds reached")