surf-spot-finder / constants.py
njbrake's picture
Update constants.py
6c3142a verified
from any_agent.evaluation import EvaluationCase
from surf_spot_finder.tools import (
get_area_lat_lon,
get_wave_forecast,
get_wind_forecast,
)
from any_agent.tools.web_browsing import search_web, visit_webpage
MODEL_OPTIONS = [
# "huggingface/novita/deepseek-ai/DeepSeek-V3",
# "huggingface/novita/meta-llama/Llama-3.3-70B-Instruct",
"openai/gpt-4.1-nano",
"openai/gpt-4.1-mini",
"openai/gpt-4o",
"gemini/gemini-2.0-flash-lite",
"gemini/gemini-2.0-flash",
# "huggingface/Qwen/Qwen3-32B", # right now throwing an internal error, but novita qwen isn't supporting tool calling
]
# Novita was the only HF based provider that worked.
# Hugginface API Provider Error:
# Must alternate between assistant/user, which meant that the 'tool' role made it puke
DEFAULT_EVALUATION_CASE = EvaluationCase(
llm_judge=MODEL_OPTIONS[0],
checkpoints=[
{
"criteria": "Check if the agent considered at least three surf spot options",
"points": 1,
},
{
"criteria": "Check if the agent gathered wind forecasts for each surf spot being evaluated.",
"points": 1,
},
{
"criteria": "Check if the agent gathered wave forecasts for each surf spot being evaluated.",
"points": 1,
},
{
"criteria": "Check if the agent used any web search tools to explore which surf spots should be considered",
"points": 1,
},
{
"criteria": "Check if the final answer contains any description about the weather (air temp, chance of rain, etc) at the chosen location",
"points": 1,
},
{
"criteria": "Check if the final answer includes one of the surf spots evaluated by tools",
"points": 1,
},
{
"criteria": "Check if the final answer includes information about some alternative surf spots if the user is not satisfied with the chosen one",
"points": 1,
},
],
)
DEFAULT_TOOLS = [
get_wind_forecast,
get_wave_forecast,
get_area_lat_lon,
search_web,
visit_webpage,
]