* feat(kanban): goal_mode cards run workers in a /goal loop A goal_mode card wraps its dispatched worker in the Ralph-style goal loop behind /goal: after each turn an auxiliary judge checks the worker's response against the card title+body, and if not done the worker keeps going in the SAME session until the judge agrees, the worker terminates the task itself, or the turn budget runs out (which blocks the card for human review — never a silent exit). - kanban_db: goal_mode + goal_max_turns columns (additive migration), Task fields, create_task params, INSERT wiring, created-event payload. - kanban_tools: goal_mode/goal_max_turns on the kanban_create tool so orchestrators can opt cards in when fanning out. - kanban CLI: --goal / --goal-max-turns on 'kanban create'. - dashboard API: goal_mode/goal_max_turns on the create endpoint (auto-surfaced back via asdict). - _default_spawn: sets HERMES_KANBAN_GOAL_MODE / _GOAL_MAX_TURNS only when the card opts in. - goals.run_kanban_goal_loop: standalone, callback-injected loop engine (no SessionDB persistence; ephemeral worker). cli.py quiet path calls it after the worker's first turn when the env vars are set. - Docs: orchestrator skill + kanban feature page. Tests: DB roundtrip + legacy migration, spawn env gating, and the loop's continuation/completion/budget-block/finalize-nudge branches. E2E run against a real kanban DB confirms a budget-exhausted goal worker lands in a sticky blocked state. * feat(kanban/dashboard): goal-mode toggle in the create form Wires the goal_mode card setting into the dashboard UI (the plugin's hand-written IIFE bundle, no build step): - InlineCreate: 'goal mode' checkbox after the skills field; checking it reveals an optional 'max turns' number input. Both reset on submit and only post goal_mode/goal_max_turns when enabled. - TaskDrawer: a 'Goal mode: on (max N turns)' MetaRow so a card's goal-mode setting is visible after creation (auto-fed by asdict via the existing _task_dict). Live-tested through the running dashboard with a browser: created a goal-mode card with max-turns=8, confirmed it persisted to the kanban DB (goal_mode=1, goal_max_turns=8) and rendered back in the drawer as 'on (max 8 turns)'. No JS console errors.
301 lines
9.4 KiB
Python
301 lines
9.4 KiB
Python
"""Tests for kanban goal_mode — per-card Ralph-style goal loop.
|
|
|
|
Covers three layers:
|
|
|
|
1. DB: goal_mode / goal_max_turns persist through create_task + from_row,
|
|
and a legacy DB (without the columns) migrates cleanly.
|
|
2. Spawn: _default_spawn sets the HERMES_KANBAN_GOAL_MODE env vars only
|
|
when the card opts in.
|
|
3. Loop: goals.run_kanban_goal_loop continuation / completion / budget
|
|
behaviour, driven entirely through injected callbacks (no live model).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sqlite3
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from hermes_cli import kanban_db as kb
|
|
from hermes_cli import goals
|
|
|
|
|
|
@pytest.fixture
|
|
def kanban_home(tmp_path, monkeypatch):
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
kb.init_db()
|
|
return home
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# DB layer
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_goal_mode_defaults_off(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="plain task", assignee="worker")
|
|
task = kb.get_task(conn, tid)
|
|
assert task.goal_mode is False
|
|
assert task.goal_max_turns is None
|
|
|
|
|
|
def test_goal_mode_persists(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(
|
|
conn,
|
|
title="open-ended task",
|
|
assignee="worker",
|
|
goal_mode=True,
|
|
goal_max_turns=7,
|
|
)
|
|
task = kb.get_task(conn, tid)
|
|
assert task.goal_mode is True
|
|
assert task.goal_max_turns == 7
|
|
|
|
|
|
def test_goal_mode_without_max_turns(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(
|
|
conn, title="t", assignee="worker", goal_mode=True
|
|
)
|
|
task = kb.get_task(conn, tid)
|
|
assert task.goal_mode is True
|
|
assert task.goal_max_turns is None
|
|
|
|
|
|
def test_legacy_db_migrates_goal_columns(tmp_path, monkeypatch):
|
|
"""A tasks table created without goal columns must gain them on init."""
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
|
|
db_path = kb.kanban_db_path()
|
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
# Minimal legacy schema: tasks table missing goal_mode / goal_max_turns.
|
|
legacy = sqlite3.connect(db_path)
|
|
legacy.execute(
|
|
"""
|
|
CREATE TABLE tasks (
|
|
id TEXT PRIMARY KEY,
|
|
title TEXT NOT NULL,
|
|
body TEXT,
|
|
assignee TEXT,
|
|
status TEXT NOT NULL DEFAULT 'ready',
|
|
priority INTEGER NOT NULL DEFAULT 0,
|
|
created_by TEXT,
|
|
created_at INTEGER NOT NULL,
|
|
started_at INTEGER,
|
|
completed_at INTEGER,
|
|
workspace_kind TEXT NOT NULL DEFAULT 'scratch',
|
|
workspace_path TEXT,
|
|
claim_lock TEXT,
|
|
claim_expires INTEGER
|
|
)
|
|
"""
|
|
)
|
|
legacy.execute(
|
|
"INSERT INTO tasks (id, title, status, priority, created_at, workspace_kind) "
|
|
"VALUES ('legacy1', 'old', 'ready', 0, 1, 'scratch')"
|
|
)
|
|
legacy.commit()
|
|
legacy.close()
|
|
|
|
# init_db runs the additive migration.
|
|
kb.init_db()
|
|
with kb.connect() as conn:
|
|
cols = {r["name"] for r in conn.execute("PRAGMA table_info(tasks)")}
|
|
assert "goal_mode" in cols
|
|
assert "goal_max_turns" in cols
|
|
task = kb.get_task(conn, "legacy1")
|
|
# Existing row keeps the safe default.
|
|
assert task.goal_mode is False
|
|
assert task.goal_max_turns is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Spawn env
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_spawn_sets_goal_env_only_when_enabled(kanban_home, monkeypatch):
|
|
captured = {}
|
|
|
|
class _FakeProc:
|
|
pid = 4242
|
|
|
|
def _fake_popen(cmd, **kwargs):
|
|
captured["env"] = kwargs.get("env", {})
|
|
return _FakeProc()
|
|
|
|
monkeypatch.setattr("subprocess.Popen", _fake_popen)
|
|
# Avoid the kanban-worker skill probe touching the real skills dir.
|
|
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda home: False)
|
|
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(
|
|
conn,
|
|
title="goal task",
|
|
assignee="default",
|
|
goal_mode=True,
|
|
goal_max_turns=5,
|
|
)
|
|
task = kb.get_task(conn, tid)
|
|
|
|
kb._default_spawn(task, str(kanban_home))
|
|
env = captured["env"]
|
|
assert env.get("HERMES_KANBAN_GOAL_MODE") == "1"
|
|
assert env.get("HERMES_KANBAN_GOAL_MAX_TURNS") == "5"
|
|
|
|
|
|
def test_spawn_no_goal_env_for_plain_task(kanban_home, monkeypatch):
|
|
captured = {}
|
|
|
|
class _FakeProc:
|
|
pid = 4243
|
|
|
|
def _fake_popen(cmd, **kwargs):
|
|
captured["env"] = kwargs.get("env", {})
|
|
return _FakeProc()
|
|
|
|
monkeypatch.setattr("subprocess.Popen", _fake_popen)
|
|
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda home: False)
|
|
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="plain", assignee="default")
|
|
task = kb.get_task(conn, tid)
|
|
|
|
kb._default_spawn(task, str(kanban_home))
|
|
env = captured["env"]
|
|
assert "HERMES_KANBAN_GOAL_MODE" not in env
|
|
assert "HERMES_KANBAN_GOAL_MAX_TURNS" not in env
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Goal loop logic (callback-injected, no live model)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _patch_judge(monkeypatch, verdicts):
|
|
"""Make judge_goal return a scripted sequence of verdicts."""
|
|
seq = list(verdicts)
|
|
|
|
def _fake_judge(goal, response, subgoals=None):
|
|
v = seq.pop(0) if seq else "done"
|
|
return v, f"scripted:{v}", False
|
|
|
|
monkeypatch.setattr(goals, "judge_goal", _fake_judge)
|
|
|
|
|
|
def test_loop_stops_when_worker_already_completed(monkeypatch):
|
|
# Worker called kanban_complete on its first turn — no judging needed.
|
|
_patch_judge(monkeypatch, ["continue"]) # should never be consulted
|
|
turns = []
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t1",
|
|
goal_text="do the thing",
|
|
run_turn=lambda p: turns.append(p) or "x",
|
|
task_status_fn=lambda: "done",
|
|
block_fn=lambda r: pytest.fail("should not block"),
|
|
first_response="done already",
|
|
)
|
|
assert res["outcome"] == "completed_by_worker"
|
|
assert turns == [] # no extra turns
|
|
|
|
|
|
def test_loop_continues_then_worker_completes(monkeypatch):
|
|
_patch_judge(monkeypatch, ["continue", "continue"])
|
|
statuses = iter(["running", "running", "done"])
|
|
turns = []
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t2",
|
|
goal_text="ship feature",
|
|
run_turn=lambda p: turns.append(p) or f"turn{len(turns)}",
|
|
task_status_fn=lambda: next(statuses),
|
|
block_fn=lambda r: pytest.fail("should not block"),
|
|
max_turns=10,
|
|
first_response="started",
|
|
)
|
|
assert res["outcome"] == "completed_by_worker"
|
|
# Two continuation turns fed before the worker completed.
|
|
assert len(turns) == 2
|
|
assert all("not done yet" in p for p in turns)
|
|
|
|
|
|
def test_loop_blocks_on_budget_exhaustion(monkeypatch):
|
|
_patch_judge(monkeypatch, ["continue"] * 10)
|
|
blocked = {}
|
|
|
|
def _block(reason):
|
|
blocked["reason"] = reason
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t3",
|
|
goal_text="endless task",
|
|
run_turn=lambda p: "still going",
|
|
task_status_fn=lambda: "running",
|
|
block_fn=_block,
|
|
max_turns=3,
|
|
first_response="turn1",
|
|
)
|
|
assert res["outcome"] == "blocked_budget"
|
|
assert res["turns_used"] == 3
|
|
assert "turn budget" in blocked["reason"].lower()
|
|
|
|
|
|
def test_loop_finalize_nudge_when_judge_done_but_open(monkeypatch):
|
|
# Judge says done, but worker never terminated → one finalize nudge,
|
|
# then worker completes.
|
|
_patch_judge(monkeypatch, ["done", "done"])
|
|
statuses = iter(["running", "done"])
|
|
turns = []
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t4",
|
|
goal_text="task",
|
|
run_turn=lambda p: turns.append(p) or "ok",
|
|
task_status_fn=lambda: next(statuses),
|
|
block_fn=lambda r: pytest.fail("should not block"),
|
|
max_turns=10,
|
|
first_response="looks done",
|
|
)
|
|
assert res["outcome"] == "completed_by_worker"
|
|
assert len(turns) == 1
|
|
assert "still open" in turns[0]
|
|
|
|
|
|
def test_loop_blocks_when_judge_done_but_never_finalizes(monkeypatch):
|
|
# Judge keeps saying done, worker never calls kanban_complete → block
|
|
# after the single finalize nudge.
|
|
_patch_judge(monkeypatch, ["done", "done"])
|
|
blocked = {}
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t5",
|
|
goal_text="task",
|
|
run_turn=lambda p: "still not finalizing",
|
|
task_status_fn=lambda: "running",
|
|
block_fn=lambda r: blocked.update(reason=r),
|
|
max_turns=10,
|
|
first_response="looks done",
|
|
)
|
|
assert res["outcome"] == "blocked_budget"
|
|
assert "finalize" in blocked["reason"].lower()
|
|
|
|
|
|
def test_loop_stops_if_task_reclaimed(monkeypatch):
|
|
_patch_judge(monkeypatch, ["continue"])
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t6",
|
|
goal_text="task",
|
|
run_turn=lambda p: pytest.fail("should not run a turn"),
|
|
task_status_fn=lambda: "archived",
|
|
block_fn=lambda r: pytest.fail("should not block"),
|
|
first_response="x",
|
|
)
|
|
assert res["outcome"] == "stopped"
|