File size: 4,530 Bytes
6fdc19a
 
 
 
 
 
62cf4ef
6fdc19a
 
 
 
62cf4ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fdc19a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62cf4ef
 
 
 
 
 
 
 
 
 
 
6fdc19a
 
 
62cf4ef
 
 
 
 
 
 
 
 
 
 
 
6fdc19a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62cf4ef
 
 
 
 
6fdc19a
 
 
 
 
 
62cf4ef
 
 
6fdc19a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from datetime import datetime, timedelta
import json
import requests
import streamlit as st
from any_agent import AgentFramework
from any_agent.tracing.trace import _is_tracing_supported
from any_agent.evaluation import EvaluationCase

from constants import MODEL_OPTIONS


def create_evaluation_case() -> EvaluationCase:
    """Create an EvaluationCase from the user configuration.

    Args:
        case_config (dict): The evaluation case configuration from the user

    Returns:
        EvaluationCase: The created evaluation case
    """

    return EvaluationCase(
        llm_judge="openai/gpt-4.1-mini",
        checkpoints=[
            {
                "criteria": "Check if the agent used the get_surfing_spots tool and it succeeded, and that the tool was used before the get_wave_forecast and get_wind_forecast tools",
                "points": 1,
            },
            {
                "criteria": "Check if the agent used the get_wave_forecast tool and it succeeded",
                "points": 1,
            },
            {
                "criteria": "Check if the agent used the get_wind_forecast tool and it succeeded",
                "points": 1,
            },
            {
                "criteria": "Check if the agent used the get_area_lat_lon tool and it succeeded",
                "points": 1,
            },
            {
                "criteria": "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded",
                "points": 1,
            },
            {
                "criteria": "Check if the final answer contains any description about the weather at the chosen location",
                "points": 1,
            },
            {
                "criteria": "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool",
                "points": 1,
            },
            {
                "criteria": "Check that the agent completed in fewer than 10 steps",
                "points": 1,
            },
        ],
    )


@st.cache_resource
def get_area(area_name: str) -> dict:
    """Get the area from Nominatim.

    Uses the [Nominatim API](https://nominatim.org/release-docs/develop/api/Search/).

    Args:
        area_name (str): The name of the area.

    Returns:
        dict: The area found.
    """
    response = requests.get(
        f"https://nominatim.openstreetmap.org/search?q={area_name}&format=json",
        headers={"User-Agent": "Mozilla/5.0"},
        timeout=5,
    )
    response.raise_for_status()
    response_json = json.loads(response.content.decode())
    return response_json


def get_user_inputs() -> dict:
    default_val = "Los Angeles California, US"

    col1, col2 = st.columns([3, 1])
    with col1:
        location = st.text_input("Enter a location", value=default_val)
    with col2:
        if location:
            location_check = get_area(location)
            if not location_check:
                st.error("❌")
            else:
                st.success("βœ…")

    max_driving_hours = st.number_input(
        "Enter the maximum driving hours", min_value=1, value=2
    )

    col_date, col_time = st.columns([2, 1])
    with col_date:
        date = st.date_input(
            "Select a date in the future", value=datetime.now() + timedelta(days=1)
        )
    with col_time:
        # default to 9am
        time = st.time_input(
            "Select a time", value=datetime.now().time().replace(hour=9, minute=0)
        )
    date = datetime.combine(date, time)

    supported_frameworks = [
        framework for framework in AgentFramework if _is_tracing_supported(framework)
    ]

    framework = st.selectbox(
        "Select the agent framework to use",
        supported_frameworks,
        index=2,
        format_func=lambda x: x.name,
    )

    model_id = st.selectbox(
        "Select the model to use",
        MODEL_OPTIONS,
        index=0,
        format_func=lambda x: "/".join(x.split("/")[-3:]),
    )

    # Add evaluation case section
    with st.expander("Evaluation Case"):
        evaluation_case = create_evaluation_case()
        st.write(evaluation_case.model_dump(), expanded=True)

    return {
        "location": location,
        "max_driving_hours": max_driving_hours,
        "date": date,
        "framework": framework,
        "model_id": model_id,
        "evaluation_case": evaluation_case
        if st.checkbox("Run Evaluation", value=True)
        else None,
    }