Files
hermes-agent/tests/hermes_cli/test_kanban_goal_mode.py
Teknium 0cd7d54b00 feat(kanban): goal_mode cards run workers in a /goal loop (#35710)
* feat(kanban): goal_mode cards run workers in a /goal loop

A goal_mode card wraps its dispatched worker in the Ralph-style goal
loop behind /goal: after each turn an auxiliary judge checks the
worker's response against the card title+body, and if not done the
worker keeps going in the SAME session until the judge agrees, the
worker terminates the task itself, or the turn budget runs out (which
blocks the card for human review — never a silent exit).

- kanban_db: goal_mode + goal_max_turns columns (additive migration),
  Task fields, create_task params, INSERT wiring, created-event payload.
- kanban_tools: goal_mode/goal_max_turns on the kanban_create tool so
  orchestrators can opt cards in when fanning out.
- kanban CLI: --goal / --goal-max-turns on 'kanban create'.
- dashboard API: goal_mode/goal_max_turns on the create endpoint
  (auto-surfaced back via asdict).
- _default_spawn: sets HERMES_KANBAN_GOAL_MODE / _GOAL_MAX_TURNS only
  when the card opts in.
- goals.run_kanban_goal_loop: standalone, callback-injected loop engine
  (no SessionDB persistence; ephemeral worker). cli.py quiet path calls
  it after the worker's first turn when the env vars are set.
- Docs: orchestrator skill + kanban feature page.

Tests: DB roundtrip + legacy migration, spawn env gating, and the loop's
continuation/completion/budget-block/finalize-nudge branches. E2E run
against a real kanban DB confirms a budget-exhausted goal worker lands
in a sticky blocked state.

* feat(kanban/dashboard): goal-mode toggle in the create form

Wires the goal_mode card setting into the dashboard UI (the plugin's
hand-written IIFE bundle, no build step):

- InlineCreate: 'goal mode' checkbox after the skills field; checking it
  reveals an optional 'max turns' number input. Both reset on submit and
  only post goal_mode/goal_max_turns when enabled.
- TaskDrawer: a 'Goal mode: on (max N turns)' MetaRow so a card's
  goal-mode setting is visible after creation (auto-fed by asdict via the
  existing _task_dict).

Live-tested through the running dashboard with a browser: created a
goal-mode card with max-turns=8, confirmed it persisted to the kanban DB
(goal_mode=1, goal_max_turns=8) and rendered back in the drawer as
'on (max 8 turns)'. No JS console errors.
2026-05-31 01:16:33 -07:00

301 lines
9.4 KiB
Python

"""Tests for kanban goal_mode — per-card Ralph-style goal loop.
Covers three layers:
1. DB: goal_mode / goal_max_turns persist through create_task + from_row,
and a legacy DB (without the columns) migrates cleanly.
2. Spawn: _default_spawn sets the HERMES_KANBAN_GOAL_MODE env vars only
when the card opts in.
3. Loop: goals.run_kanban_goal_loop continuation / completion / budget
behaviour, driven entirely through injected callbacks (no live model).
"""
from __future__ import annotations
import sqlite3
from pathlib import Path
import pytest
from hermes_cli import kanban_db as kb
from hermes_cli import goals
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
# ---------------------------------------------------------------------------
# DB layer
# ---------------------------------------------------------------------------
def test_goal_mode_defaults_off(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(conn, title="plain task", assignee="worker")
task = kb.get_task(conn, tid)
assert task.goal_mode is False
assert task.goal_max_turns is None
def test_goal_mode_persists(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(
conn,
title="open-ended task",
assignee="worker",
goal_mode=True,
goal_max_turns=7,
)
task = kb.get_task(conn, tid)
assert task.goal_mode is True
assert task.goal_max_turns == 7
def test_goal_mode_without_max_turns(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(
conn, title="t", assignee="worker", goal_mode=True
)
task = kb.get_task(conn, tid)
assert task.goal_mode is True
assert task.goal_max_turns is None
def test_legacy_db_migrates_goal_columns(tmp_path, monkeypatch):
"""A tasks table created without goal columns must gain them on init."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
db_path = kb.kanban_db_path()
db_path.parent.mkdir(parents=True, exist_ok=True)
# Minimal legacy schema: tasks table missing goal_mode / goal_max_turns.
legacy = sqlite3.connect(db_path)
legacy.execute(
"""
CREATE TABLE tasks (
id TEXT PRIMARY KEY,
title TEXT NOT NULL,
body TEXT,
assignee TEXT,
status TEXT NOT NULL DEFAULT 'ready',
priority INTEGER NOT NULL DEFAULT 0,
created_by TEXT,
created_at INTEGER NOT NULL,
started_at INTEGER,
completed_at INTEGER,
workspace_kind TEXT NOT NULL DEFAULT 'scratch',
workspace_path TEXT,
claim_lock TEXT,
claim_expires INTEGER
)
"""
)
legacy.execute(
"INSERT INTO tasks (id, title, status, priority, created_at, workspace_kind) "
"VALUES ('legacy1', 'old', 'ready', 0, 1, 'scratch')"
)
legacy.commit()
legacy.close()
# init_db runs the additive migration.
kb.init_db()
with kb.connect() as conn:
cols = {r["name"] for r in conn.execute("PRAGMA table_info(tasks)")}
assert "goal_mode" in cols
assert "goal_max_turns" in cols
task = kb.get_task(conn, "legacy1")
# Existing row keeps the safe default.
assert task.goal_mode is False
assert task.goal_max_turns is None
# ---------------------------------------------------------------------------
# Spawn env
# ---------------------------------------------------------------------------
def test_spawn_sets_goal_env_only_when_enabled(kanban_home, monkeypatch):
captured = {}
class _FakeProc:
pid = 4242
def _fake_popen(cmd, **kwargs):
captured["env"] = kwargs.get("env", {})
return _FakeProc()
monkeypatch.setattr("subprocess.Popen", _fake_popen)
# Avoid the kanban-worker skill probe touching the real skills dir.
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda home: False)
with kb.connect() as conn:
tid = kb.create_task(
conn,
title="goal task",
assignee="default",
goal_mode=True,
goal_max_turns=5,
)
task = kb.get_task(conn, tid)
kb._default_spawn(task, str(kanban_home))
env = captured["env"]
assert env.get("HERMES_KANBAN_GOAL_MODE") == "1"
assert env.get("HERMES_KANBAN_GOAL_MAX_TURNS") == "5"
def test_spawn_no_goal_env_for_plain_task(kanban_home, monkeypatch):
captured = {}
class _FakeProc:
pid = 4243
def _fake_popen(cmd, **kwargs):
captured["env"] = kwargs.get("env", {})
return _FakeProc()
monkeypatch.setattr("subprocess.Popen", _fake_popen)
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda home: False)
with kb.connect() as conn:
tid = kb.create_task(conn, title="plain", assignee="default")
task = kb.get_task(conn, tid)
kb._default_spawn(task, str(kanban_home))
env = captured["env"]
assert "HERMES_KANBAN_GOAL_MODE" not in env
assert "HERMES_KANBAN_GOAL_MAX_TURNS" not in env
# ---------------------------------------------------------------------------
# Goal loop logic (callback-injected, no live model)
# ---------------------------------------------------------------------------
def _patch_judge(monkeypatch, verdicts):
"""Make judge_goal return a scripted sequence of verdicts."""
seq = list(verdicts)
def _fake_judge(goal, response, subgoals=None):
v = seq.pop(0) if seq else "done"
return v, f"scripted:{v}", False
monkeypatch.setattr(goals, "judge_goal", _fake_judge)
def test_loop_stops_when_worker_already_completed(monkeypatch):
# Worker called kanban_complete on its first turn — no judging needed.
_patch_judge(monkeypatch, ["continue"]) # should never be consulted
turns = []
res = goals.run_kanban_goal_loop(
task_id="t1",
goal_text="do the thing",
run_turn=lambda p: turns.append(p) or "x",
task_status_fn=lambda: "done",
block_fn=lambda r: pytest.fail("should not block"),
first_response="done already",
)
assert res["outcome"] == "completed_by_worker"
assert turns == [] # no extra turns
def test_loop_continues_then_worker_completes(monkeypatch):
_patch_judge(monkeypatch, ["continue", "continue"])
statuses = iter(["running", "running", "done"])
turns = []
res = goals.run_kanban_goal_loop(
task_id="t2",
goal_text="ship feature",
run_turn=lambda p: turns.append(p) or f"turn{len(turns)}",
task_status_fn=lambda: next(statuses),
block_fn=lambda r: pytest.fail("should not block"),
max_turns=10,
first_response="started",
)
assert res["outcome"] == "completed_by_worker"
# Two continuation turns fed before the worker completed.
assert len(turns) == 2
assert all("not done yet" in p for p in turns)
def test_loop_blocks_on_budget_exhaustion(monkeypatch):
_patch_judge(monkeypatch, ["continue"] * 10)
blocked = {}
def _block(reason):
blocked["reason"] = reason
res = goals.run_kanban_goal_loop(
task_id="t3",
goal_text="endless task",
run_turn=lambda p: "still going",
task_status_fn=lambda: "running",
block_fn=_block,
max_turns=3,
first_response="turn1",
)
assert res["outcome"] == "blocked_budget"
assert res["turns_used"] == 3
assert "turn budget" in blocked["reason"].lower()
def test_loop_finalize_nudge_when_judge_done_but_open(monkeypatch):
# Judge says done, but worker never terminated → one finalize nudge,
# then worker completes.
_patch_judge(monkeypatch, ["done", "done"])
statuses = iter(["running", "done"])
turns = []
res = goals.run_kanban_goal_loop(
task_id="t4",
goal_text="task",
run_turn=lambda p: turns.append(p) or "ok",
task_status_fn=lambda: next(statuses),
block_fn=lambda r: pytest.fail("should not block"),
max_turns=10,
first_response="looks done",
)
assert res["outcome"] == "completed_by_worker"
assert len(turns) == 1
assert "still open" in turns[0]
def test_loop_blocks_when_judge_done_but_never_finalizes(monkeypatch):
# Judge keeps saying done, worker never calls kanban_complete → block
# after the single finalize nudge.
_patch_judge(monkeypatch, ["done", "done"])
blocked = {}
res = goals.run_kanban_goal_loop(
task_id="t5",
goal_text="task",
run_turn=lambda p: "still not finalizing",
task_status_fn=lambda: "running",
block_fn=lambda r: blocked.update(reason=r),
max_turns=10,
first_response="looks done",
)
assert res["outcome"] == "blocked_budget"
assert "finalize" in blocked["reason"].lower()
def test_loop_stops_if_task_reclaimed(monkeypatch):
_patch_judge(monkeypatch, ["continue"])
res = goals.run_kanban_goal_loop(
task_id="t6",
goal_text="task",
run_turn=lambda p: pytest.fail("should not run a turn"),
task_status_fn=lambda: "archived",
block_fn=lambda r: pytest.fail("should not block"),
first_response="x",
)
assert res["outcome"] == "stopped"