K-Sort-Arena / model /matchmaker.py
ksort's picture
fix bug
0e99a0b
raw
history blame
2.85 kB
import numpy as np
import json
from trueskill import TrueSkill
import paramiko
import io, os
import sys
sys.path.append('../')
from serve.constants import SSH_SERVER, SSH_PORT, SSH_USER, SSH_PASSWORD, SSH_SKILL
trueskill_env = TrueSkill()
def ucb_score(trueskill_diff, t, n):
exploration_term = np.sqrt((2 * np.log(t + 1e-5)) / (n + 1e-5))
ucb = -trueskill_diff + 1.0 * exploration_term
return ucb
def update_trueskill(ratings, ranks):
new_ratings = trueskill_env.rate(ratings, ranks)
return new_ratings
def serialize_rating(rating):
return {'mu': rating.mu, 'sigma': rating.sigma}
def deserialize_rating(rating_dict):
return trueskill_env.Rating(mu=rating_dict['mu'], sigma=rating_dict['sigma'])
def create_ssh_client(server, port, user, password):
ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(server, port, user, password)
return ssh
def save_json_via_sftp(ratings, comparison_counts, total_comparisons):
ssh = create_ssh_client(SSH_SERVER, SSH_PORT, SSH_USER, SSH_PASSWORD)
data = {
'ratings': [serialize_rating(r) for r in ratings],
'comparison_counts': comparison_counts.tolist(),
'total_comparisons': total_comparisons
}
json_data = json.dumps(data)
sftp = ssh.open_sftp()
with sftp.open(SSH_SKILL, 'w') as f:
f.write(json_data)
def load_json_via_sftp():
ssh = create_ssh_client(SSH_SERVER, SSH_PORT, SSH_USER, SSH_PASSWORD)
sftp = ssh.open_sftp()
with sftp.open(SSH_SKILL, 'r') as f:
data = json.load(f)
ratings = [deserialize_rating(r) for r in data['ratings']]
comparison_counts = np.array(data['comparison_counts'])
total_comparisons = data['total_comparisons']
return ratings, comparison_counts, total_comparisons
def matchmaker(num_players, k_group=4):
trueskill_env = TrueSkill()
ratings, comparison_counts, total_comparisons = load_json_via_sftp()
# Randomly select a player
selected_player = np.random.randint(0, num_players)
selected_trueskill_score = trueskill_env.expose(ratings[selected_player])
trueskill_scores = np.array([trueskill_env.expose(p) for p in ratings])
trueskill_diff = np.abs(trueskill_scores - selected_trueskill_score)
n = comparison_counts[selected_player]
ucb_scores = ucb_score(trueskill_diff, total_comparisons, n)
# Exclude self, select opponent with highest UCB score
ucb_scores[selected_player] = -float('inf') # minimize the score for the selected player to exclude it
opponents = np.argsort(ucb_scores)[-k_group + 1:].tolist()
# Group players
model_ids = [selected_player] + opponents
from serve.update_skill import Model_ID
Model_ID.group = model_ids
return model_ids