fix: add recovery hints to loop guard warnings
This commit is contained in:
@ -336,10 +336,7 @@ class ToolCallGuardrailController:
|
|||||||
return ToolGuardrailDecision(
|
return ToolGuardrailDecision(
|
||||||
action="warn",
|
action="warn",
|
||||||
code="same_tool_failure_warning",
|
code="same_tool_failure_warning",
|
||||||
message=(
|
message=_tool_failure_recovery_hint(tool_name, same_count),
|
||||||
f"{tool_name} has failed {same_count} times this turn. "
|
|
||||||
"This looks like a loop; change approach before retrying."
|
|
||||||
),
|
|
||||||
tool_name=tool_name,
|
tool_name=tool_name,
|
||||||
count=same_count,
|
count=same_count,
|
||||||
signature=signature,
|
signature=signature,
|
||||||
@ -406,6 +403,26 @@ def append_toolguard_guidance(result: str, decision: ToolGuardrailDecision) -> s
|
|||||||
return (result or "") + suffix
|
return (result or "") + suffix
|
||||||
|
|
||||||
|
|
||||||
|
def _tool_failure_recovery_hint(tool_name: str, count: int) -> str:
|
||||||
|
"""Action-oriented guidance for recovering from repeated tool failures."""
|
||||||
|
common = (
|
||||||
|
f"{tool_name} has failed {count} times this turn. This looks like a loop. "
|
||||||
|
"Do not switch to text-only replies; keep using tools, but diagnose before retrying. "
|
||||||
|
"First inspect the latest error/output and verify your assumptions. "
|
||||||
|
)
|
||||||
|
if tool_name == "terminal":
|
||||||
|
return common + (
|
||||||
|
"For terminal failures, run a small diagnostic such as `pwd && ls -la` "
|
||||||
|
"in the same tool, then try an absolute path, a simpler command, a different "
|
||||||
|
"working directory, or a different tool such as read_file/write_file/patch."
|
||||||
|
)
|
||||||
|
return common + (
|
||||||
|
"Try different arguments, a narrower query/path, an absolute path when relevant, "
|
||||||
|
"or a different tool that can make progress. If the blocker is external, report "
|
||||||
|
"the blocker after one diagnostic attempt instead of repeating the same failing path."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
|
def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
|
||||||
return args if isinstance(args, Mapping) else {}
|
return args if isinstance(args, Mapping) else {}
|
||||||
|
|
||||||
|
|||||||
@ -160,6 +160,10 @@ def test_same_tool_varying_args_warns_by_default_without_halting():
|
|||||||
assert first.action == "allow"
|
assert first.action == "allow"
|
||||||
assert [second.action, third.action, fourth.action] == ["warn", "warn", "warn"]
|
assert [second.action, third.action, fourth.action] == ["warn", "warn", "warn"]
|
||||||
assert {second.code, third.code, fourth.code} == {"same_tool_failure_warning"}
|
assert {second.code, third.code, fourth.code} == {"same_tool_failure_warning"}
|
||||||
|
assert "Do not switch to text-only replies" in second.message
|
||||||
|
assert "keep using tools" in second.message
|
||||||
|
assert "diagnose before retrying" in second.message
|
||||||
|
assert "different tool" in second.message
|
||||||
assert controller.halt_decision is None
|
assert controller.halt_decision is None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -153,6 +153,37 @@ def test_sequential_after_call_appends_guidance_to_tool_result_without_extra_mes
|
|||||||
assert "repeated_exact_failure_warning" in messages[0]["content"]
|
assert "repeated_exact_failure_warning" in messages[0]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_same_tool_failure_warning_tells_model_to_recover_with_tools():
|
||||||
|
agent = _make_agent("terminal")
|
||||||
|
guardrails = getattr(agent, "_tool_guardrails")
|
||||||
|
guardrails.after_call(
|
||||||
|
"terminal",
|
||||||
|
{"command": "bad-1"},
|
||||||
|
json.dumps({"exit_code": 1}),
|
||||||
|
failed=True,
|
||||||
|
)
|
||||||
|
guardrails.after_call(
|
||||||
|
"terminal",
|
||||||
|
{"command": "bad-2"},
|
||||||
|
json.dumps({"exit_code": 1}),
|
||||||
|
failed=True,
|
||||||
|
)
|
||||||
|
tc = _mock_tool_call("terminal", json.dumps({"command": "bad-3"}), "c-recover")
|
||||||
|
msg = SimpleNamespace(content="", tool_calls=[tc])
|
||||||
|
messages = []
|
||||||
|
|
||||||
|
with patch("run_agent.handle_function_call", return_value=json.dumps({"exit_code": 1})):
|
||||||
|
agent._execute_tool_calls_sequential(msg, messages, "task-1")
|
||||||
|
|
||||||
|
content = messages[0]["content"]
|
||||||
|
assert "same_tool_failure_warning" in content
|
||||||
|
assert "Do not switch to text-only replies" in content
|
||||||
|
assert "keep using tools" in content
|
||||||
|
assert "pwd && ls -la" in content
|
||||||
|
assert "absolute path" in content
|
||||||
|
assert "different tool" in content
|
||||||
|
|
||||||
|
|
||||||
def test_config_enabled_hard_stop_concurrent_path_does_not_submit_blocked_calls_and_preserves_result_order():
|
def test_config_enabled_hard_stop_concurrent_path_does_not_submit_blocked_calls_and_preserves_result_order():
|
||||||
agent = _make_agent("web_search", config=_hard_stop_config())
|
agent = _make_agent("web_search", config=_hard_stop_config())
|
||||||
blocked_args = {"query": "blocked"}
|
blocked_args = {"query": "blocked"}
|
||||||
|
|||||||
Reference in New Issue
Block a user