File size: 3,313 Bytes
aebdef3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02aebba
6d50a75
aebdef3
 
 
 
295a884
aebdef3
02aebba
aebdef3
249284d
 
02aebba
295a884
aebdef3
 
249284d
295a884
aebdef3
02aebba
aebdef3
249284d
 
02aebba
295a884
3404ee0
 
 
 
 
 
 
 
 
 
6d50a75
3404ee0
 
 
 
 
 
 
 
 
 
6d50a75
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import sys, os, types
from unittest.mock import patch, MagicMock

# Ensure project root in path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

# Provide dummy litellm module so import succeeds
fake_litellm = types.ModuleType('litellm')
fake_litellm.completion = MagicMock()
sys.modules.setdefault('litellm', fake_litellm)

import tournament_utils as tu


def make_response(contents):
    class Message:
        def __init__(self, content):
            self.content = content
    class Choice:
        def __init__(self, content):
            self.message = Message(content)
    return MagicMock(choices=[Choice(c) for c in contents])


def test_generate_players():
    resp = make_response([" player1 ", "player2\n"])
    with patch('tournament_utils.completion', return_value=resp) as mock_comp:
        players = tu.generate_players('instr', 2, model='m', api_base='b', api_key='k', temperature=0.5)
        mock_comp.assert_called_once_with(model='m', messages=[{'role': 'user', 'content': 'instr'}], n=2, api_base='b', api_key='k', temperature=0.5, chat_template_kwargs={'enable_thinking': False})
        assert players == ['player1', 'player2']


def test_prompt_score():
    resp = make_response(["Final verdict: [5]"])
    with patch('tournament_utils.completion', return_value=resp) as mock_comp:
        result = tu.prompt_score('instr', ['c1'], 'block', 'pl', model='m', api_base='b', api_key='k', temperature=0.2, include_instruction=False)
        mock_comp.assert_called_once()
        assert mock_comp.call_args.kwargs['api_base'] == 'b'
        assert mock_comp.call_args.kwargs['api_key'] == 'k'
        assert mock_comp.call_args.kwargs['temperature'] == 0.2
        assert result == 'Final verdict: [5]'


def test_prompt_pairwise():
    resp = make_response(["Final verdict: A"])
    with patch('tournament_utils.completion', return_value=resp) as mock_comp:
        result = tu.prompt_pairwise('instr', 'block', 'A text', 'B text', model='m', api_base='b', api_key='k', temperature=0.3, include_instruction=False)
        mock_comp.assert_called_once()
        assert mock_comp.call_args.kwargs['api_base'] == 'b'
        assert mock_comp.call_args.kwargs['api_key'] == 'k'
        assert mock_comp.call_args.kwargs['temperature'] == 0.3
        assert result == 'Final verdict: A'


def test_thinking_passed_to_completion():
    resp = make_response(["ok"])
    with patch('tournament_utils.completion', return_value=resp) as mock_comp:
        tu.generate_players('i', 1, thinking=True)
        tu.prompt_score('i', ['c'], 'block', 'p', thinking=True)
        tu.prompt_pairwise('i', 'block', 'a', 'b', thinking=True)
        assert mock_comp.call_count == 3
        for call in mock_comp.call_args_list:
            assert call.kwargs['chat_template_kwargs'] == {'enable_thinking': True}


def test_thinking_disabled_by_default():
    resp = make_response(["ok"])
    with patch('tournament_utils.completion', return_value=resp) as mock_comp:
        tu.generate_players('i', 1)
        tu.prompt_score('i', ['c'], 'block', 'p')
        tu.prompt_pairwise('i', 'block', 'a', 'b')
        assert mock_comp.call_count == 3
        for call in mock_comp.call_args_list:
            assert call.kwargs['chat_template_kwargs'] == {'enable_thinking': False}