From 0cd7d54b00106d8992803a918ce55d5c205550f3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 31 May 2026 01:16:33 -0700 Subject: [PATCH] feat(kanban): goal_mode cards run workers in a /goal loop (#35710) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(kanban): goal_mode cards run workers in a /goal loop A goal_mode card wraps its dispatched worker in the Ralph-style goal loop behind /goal: after each turn an auxiliary judge checks the worker's response against the card title+body, and if not done the worker keeps going in the SAME session until the judge agrees, the worker terminates the task itself, or the turn budget runs out (which blocks the card for human review — never a silent exit). - kanban_db: goal_mode + goal_max_turns columns (additive migration), Task fields, create_task params, INSERT wiring, created-event payload. - kanban_tools: goal_mode/goal_max_turns on the kanban_create tool so orchestrators can opt cards in when fanning out. - kanban CLI: --goal / --goal-max-turns on 'kanban create'. - dashboard API: goal_mode/goal_max_turns on the create endpoint (auto-surfaced back via asdict). - _default_spawn: sets HERMES_KANBAN_GOAL_MODE / _GOAL_MAX_TURNS only when the card opts in. - goals.run_kanban_goal_loop: standalone, callback-injected loop engine (no SessionDB persistence; ephemeral worker). cli.py quiet path calls it after the worker's first turn when the env vars are set. - Docs: orchestrator skill + kanban feature page. Tests: DB roundtrip + legacy migration, spawn env gating, and the loop's continuation/completion/budget-block/finalize-nudge branches. E2E run against a real kanban DB confirms a budget-exhausted goal worker lands in a sticky blocked state. * feat(kanban/dashboard): goal-mode toggle in the create form Wires the goal_mode card setting into the dashboard UI (the plugin's hand-written IIFE bundle, no build step): - InlineCreate: 'goal mode' checkbox after the skills field; checking it reveals an optional 'max turns' number input. Both reset on submit and only post goal_mode/goal_max_turns when enabled. - TaskDrawer: a 'Goal mode: on (max N turns)' MetaRow so a card's goal-mode setting is visible after creation (auto-fed by asdict via the existing _task_dict). Live-tested through the running dashboard with a browser: created a goal-mode card with max-turns=8, confirmed it persisted to the kanban DB (goal_mode=1, goal_max_turns=8) and rendered back in the drawer as 'on (max 8 turns)'. No JS console errors. --- cli.py | 104 +++++++ hermes_cli/goals.py | 150 +++++++++++ hermes_cli/kanban.py | 15 ++ hermes_cli/kanban_db.py | 59 +++- plugins/kanban/dashboard/dist/index.js | 44 +++ plugins/kanban/dashboard/plugin_api.py | 4 + skills/devops/kanban-orchestrator/SKILL.md | 24 ++ tests/hermes_cli/test_kanban_goal_mode.py | 300 +++++++++++++++++++++ tools/kanban_tools.py | 31 +++ website/docs/user-guide/features/kanban.md | 15 ++ 10 files changed, 744 insertions(+), 2 deletions(-) create mode 100644 tests/hermes_cli/test_kanban_goal_mode.py diff --git a/cli.py b/cli.py index baf033920..1e8229be4 100644 --- a/cli.py +++ b/cli.py @@ -15074,6 +15074,96 @@ class HermesCLI: # Main Entry Point # ============================================================================ +def _run_kanban_goal_loop_q(cli: "HermesCLI", first_response: str) -> None: + """Drive a kanban goal_mode worker through the Ralph-style goal loop. + + Called from the quiet single-query path AFTER the worker's first turn, + only when ``HERMES_KANBAN_GOAL_MODE`` is set (dispatcher-spawned + goal_mode card). Wires the worker's ``run_conversation`` and the kanban + DB into ``goals.run_kanban_goal_loop``. All errors are swallowed by the + caller — a broken goal loop must never wedge a worker, the dispatcher's + claim TTL / crash detection is the backstop. + """ + import os as _os + + task_id = (_os.environ.get("HERMES_KANBAN_TASK") or "").strip() + if not task_id: + return + + from hermes_cli import kanban_db as _kb + from hermes_cli.goals import run_kanban_goal_loop as _run_loop, DEFAULT_MAX_TURNS as _DEF_TURNS + + # Resolve goal text from the card (title + body = the acceptance + # criteria the judge evaluates against). + conn = _kb.connect() + try: + task = _kb.get_task(conn, task_id) + finally: + try: + conn.close() + except Exception: + pass + if task is None: + return + + goal_parts = [task.title or ""] + if task.body: + goal_parts.append(task.body) + goal_text = "\n\n".join(p for p in goal_parts if p).strip() + if not goal_text: + return + + max_turns = task.goal_max_turns or _DEF_TURNS + + def _run_turn(prompt: str) -> str: + result = cli.agent.run_conversation( + user_message=prompt, + conversation_history=cli.conversation_history, + ) + # Keep session_id in sync if mid-run compression rotated it. + if ( + getattr(cli.agent, "session_id", None) + and cli.agent.session_id != cli.session_id + ): + cli.session_id = cli.agent.session_id + resp = result.get("final_response", "") if isinstance(result, dict) else str(result) + if resp: + print(resp) + return resp or "" + + def _task_status() -> "str | None": + c = _kb.connect() + try: + t = _kb.get_task(c, task_id) + return t.status if t is not None else None + finally: + try: + c.close() + except Exception: + pass + + def _block(reason: str) -> None: + c = _kb.connect() + try: + _kb.block_task(c, task_id, reason=reason) + finally: + try: + c.close() + except Exception: + pass + + _run_loop( + task_id=task_id, + goal_text=goal_text, + run_turn=_run_turn, + task_status_fn=_task_status, + block_fn=_block, + max_turns=max_turns, + first_response=first_response or "", + log=lambda m: logger.info("%s", m), + ) + + def main( query: str = None, q: str = None, @@ -15471,6 +15561,20 @@ def main( print(f"Error: {result['error']}", file=sys.stderr) elif response: print(response) + + # Kanban goal-loop mode: a worker spawned for a + # goal_mode card keeps working in THIS session until an + # auxiliary judge agrees the card is done, the worker + # terminates the task itself, or the turn budget runs + # out (→ sticky block). Gated on the env vars the + # dispatcher sets in `_default_spawn`; a no-op for every + # normal worker and every non-kanban `-q` run. + if os.environ.get("HERMES_KANBAN_GOAL_MODE") == "1": + try: + _run_kanban_goal_loop_q(cli, response) + except Exception as _goal_exc: + logger.debug("kanban goal loop failed: %s", _goal_exc) + # Session ID goes to stderr so piped stdout is clean. print(f"\nsession_id: {cli.session_id}", file=sys.stderr) diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py index d6a139419..a6a28deaf 100644 --- a/hermes_cli/goals.py +++ b/hermes_cli/goals.py @@ -747,6 +747,153 @@ class GoalManager: return CONTINUATION_PROMPT_TEMPLATE.format(goal=self._state.goal) +# ────────────────────────────────────────────────────────────────────── +# Kanban worker goal loop +# ────────────────────────────────────────────────────────────────────── + +# Continuation prompt fed back to a kanban goal-mode worker that has not +# yet completed/blocked its task. The card's own acceptance criteria are +# the goal — the worker already has the full task body in its first turn, +# so we keep this short and point it back at the lifecycle contract. +KANBAN_GOAL_CONTINUATION_TEMPLATE = ( + "[Continuing toward this kanban task — judge says it is not done yet]\n" + "Reason: {reason}\n\n" + "Take the next concrete step toward completing the task. When the work " + "is genuinely finished, call kanban_complete with a summary. If you are " + "blocked and need human input, call kanban_block with a reason. Do not " + "stop without calling one of them." +) + +# Fed when the judge believes the work is done but the worker never called +# kanban_complete / kanban_block. One explicit nudge to terminate the task +# the right way before the loop gives up. +KANBAN_GOAL_FINALIZE_TEMPLATE = ( + "[The work looks complete, but the task is still open]\n" + "Reason: {reason}\n\n" + "If the task is genuinely done, call kanban_complete now with a short " + "summary of what you did. If something still blocks completion, call " + "kanban_block with the reason instead." +) + + +def run_kanban_goal_loop( + *, + task_id: str, + goal_text: str, + run_turn, + task_status_fn, + block_fn, + max_turns: int = DEFAULT_MAX_TURNS, + first_response: str = "", + log=None, +) -> Dict[str, Any]: + """Drive a kanban worker through a Ralph-style goal loop. + + The dispatcher spawns a goal-mode worker exactly like a normal worker + (``hermes -p chat -q "work kanban task "``). The worker's + first turn has already run by the time this is called; ``first_response`` + is that turn's reply. From here we: + + 1. Check whether the worker already terminated the task (called + ``kanban_complete`` / ``kanban_block``). If so, stop — nothing to do. + 2. Otherwise judge the latest response against ``goal_text`` (the card's + title + body). ``continue`` → feed a continuation prompt and run + another turn IN THE SAME SESSION via ``run_turn``. ``done`` but the + task is still open → one explicit "call kanban_complete" nudge. + 3. When the turn budget is exhausted and the worker still hasn't + terminated the task, ``block_fn`` is invoked so the card lands in a + sticky ``blocked`` state for human review (NOT a silent exit). + + This function performs NO SessionDB persistence — a worker process is + ephemeral, so the turn budget lives in a local counter. It is fully + decoupled from the CLI for testability: callers inject ``run_turn`` + (str -> str), ``task_status_fn`` (() -> str|None), and ``block_fn`` + (reason: str -> None). + + Returns a decision dict: ``{"outcome", "turns_used", "reason"}`` where + outcome is one of ``"completed_by_worker"``, ``"blocked_budget"``, + ``"blocked_by_worker"``, or ``"stopped"``. + """ + + def _log(msg: str) -> None: + if log is not None: + try: + log(msg) + except Exception: + pass + + max_turns = int(max_turns or DEFAULT_MAX_TURNS) + if max_turns < 1: + max_turns = DEFAULT_MAX_TURNS + + last_response = first_response or "" + # The first turn already consumed one unit of budget. + turns_used = 1 + nudged_to_finalize = False + + while True: + # Did the worker terminate the task itself this turn? + try: + status = task_status_fn() + except Exception as exc: + _log(f"kanban goal loop: status check failed ({exc}); stopping") + return {"outcome": "stopped", "turns_used": turns_used, "reason": "status check failed"} + + if status == "done": + _log(f"kanban goal loop: task {task_id} completed by worker after {turns_used} turn(s)") + return {"outcome": "completed_by_worker", "turns_used": turns_used, "reason": "worker completed the task"} + if status == "blocked": + _log(f"kanban goal loop: task {task_id} blocked by worker after {turns_used} turn(s)") + return {"outcome": "blocked_by_worker", "turns_used": turns_used, "reason": "worker blocked the task"} + if status not in ("running", "ready"): + # Reclaimed / archived / unexpected — let the dispatcher own it. + _log(f"kanban goal loop: task {task_id} status={status!r}; stopping") + return {"outcome": "stopped", "turns_used": turns_used, "reason": f"status={status}"} + + # Still open — judge whether the latest response satisfies the card. + verdict, reason, _parse_failed = judge_goal(goal_text, last_response) + _log(f"kanban goal loop: turn {turns_used}/{max_turns} verdict={verdict} reason={_truncate(reason, 120)}") + + if verdict == "done": + if nudged_to_finalize: + # Already asked once to call kanban_complete and it still + # didn't — block for review rather than spin. + _log(f"kanban goal loop: task {task_id} judged done but worker won't finalize; blocking") + try: + block_fn( + f"Goal-mode worker's output looked complete but it never " + f"called kanban_complete after a finalize nudge ({reason})." + ) + except Exception as exc: + _log(f"kanban goal loop: block_fn failed ({exc})") + return {"outcome": "blocked_budget", "turns_used": turns_used, "reason": "judged done, never finalized"} + prompt = KANBAN_GOAL_FINALIZE_TEMPLATE.format(reason=_truncate(reason, 400)) + nudged_to_finalize = True + else: + prompt = KANBAN_GOAL_CONTINUATION_TEMPLATE.format(reason=_truncate(reason, 400)) + + # Budget check BEFORE spending another turn. + if turns_used >= max_turns: + _log(f"kanban goal loop: task {task_id} exhausted {turns_used}/{max_turns} turns; blocking") + try: + block_fn( + f"Goal-mode worker exhausted its turn budget " + f"({turns_used}/{max_turns}) without completing the task. " + f"Last judge verdict: {_truncate(reason, 300)}" + ) + except Exception as exc: + _log(f"kanban goal loop: block_fn failed ({exc})") + return {"outcome": "blocked_budget", "turns_used": turns_used, "reason": "turn budget exhausted"} + + # Run another turn in the same session. + try: + last_response = run_turn(prompt) or "" + except Exception as exc: + _log(f"kanban goal loop: run_turn failed ({exc}); stopping") + return {"outcome": "stopped", "turns_used": turns_used, "reason": f"run_turn error: {type(exc).__name__}"} + turns_used += 1 + + __all__ = [ "GoalState", "GoalManager", @@ -754,9 +901,12 @@ __all__ = [ "CONTINUATION_PROMPT_WITH_SUBGOALS_TEMPLATE", "JUDGE_USER_PROMPT_TEMPLATE", "JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE", + "KANBAN_GOAL_CONTINUATION_TEMPLATE", + "KANBAN_GOAL_FINALIZE_TEMPLATE", "DEFAULT_MAX_TURNS", "load_goal", "save_goal", "clear_goal", "judge_goal", + "run_kanban_goal_loop", ] diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py index a6e76fe35..8b67ebc3d 100644 --- a/hermes_cli/kanban.py +++ b/hermes_cli/kanban.py @@ -341,6 +341,19 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu "two retries. Omit to use the dispatcher's " "kanban.failure_limit config " f"(default {kb.DEFAULT_FAILURE_LIMIT}).") + p_create.add_argument("--goal", action="store_true", dest="goal_mode", + help="Run the worker in a goal loop: after each " + "turn a judge checks the response against the " + "card title/body and, if not done, the worker " + "keeps going in the same session until the " + "judge agrees it's complete (or the turn " + "budget runs out, which blocks the card for " + "review). Best for open-ended cards one shot " + "rarely finishes.") + p_create.add_argument("--goal-max-turns", type=int, default=None, + metavar="N", dest="goal_max_turns", + help="Turn budget for --goal workers (default 20). " + "Ignored without --goal.") p_create.add_argument("--initial-status", choices=sorted(kb.VALID_INITIAL_STATUSES), default="running", @@ -1343,6 +1356,8 @@ def _cmd_create(args: argparse.Namespace) -> int: max_runtime_seconds=max_runtime, skills=getattr(args, "skills", None) or None, max_retries=max_retries, + goal_mode=bool(getattr(args, "goal_mode", False)), + goal_max_turns=getattr(args, "goal_max_turns", None), initial_status=getattr(args, "initial_status", "running"), ) task = kb.get_task(conn, task_id) diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 471165524..3bb14573e 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -725,6 +725,19 @@ class Task: # ``kanban.failure_limit`` config, and then to ``DEFAULT_FAILURE_LIMIT``. # Name matches the ``--max-retries`` CLI flag on ``kanban create``. max_retries: Optional[int] = None + # When True, the dispatched worker runs in a Ralph-style goal loop + # (the same engine behind the ``/goal`` slash command): after each + # turn an auxiliary judge model evaluates the worker's response + # against this card's title/body (treated as the goal). If the judge + # says "not done" and budget remains, the worker is fed a + # continuation prompt IN THE SAME SESSION and keeps working until the + # judge agrees, the goal-turn budget is exhausted (→ kanban_block), + # or the worker explicitly blocks/completes. ``False`` (default) = + # the classic single-shot worker. ``goal_max_turns`` bounds the loop. + goal_mode: bool = False + # Goal-loop turn budget for ``goal_mode`` workers. ``None`` falls + # through to the goals engine default (``goals.DEFAULT_MAX_TURNS``). + goal_max_turns: Optional[int] = None # Originating chat/agent session id, when the task was created from # within an agent loop that propagated ``HERMES_SESSION_ID``. NULL for # tasks created from the CLI, the dashboard, or any path that doesn't @@ -797,6 +810,12 @@ class Task: max_retries=( row["max_retries"] if "max_retries" in keys else None ), + goal_mode=( + bool(row["goal_mode"]) if "goal_mode" in keys and row["goal_mode"] else False + ), + goal_max_turns=( + row["goal_max_turns"] if "goal_max_turns" in keys and row["goal_max_turns"] else None + ), session_id=( row["session_id"] if "session_id" in keys else None ), @@ -946,6 +965,16 @@ CREATE TABLE IF NOT EXISTS tasks ( -- case) falls through to the dispatcher-level ``kanban.failure_limit`` -- config and then ``DEFAULT_FAILURE_LIMIT``. max_retries INTEGER, + -- When 1, the dispatched worker runs in a Ralph-style goal loop: an + -- auxiliary judge re-evaluates the worker's response against the + -- card title/body after each turn and feeds a continuation prompt + -- back into the SAME session until the judge agrees the work is done + -- or ``goal_max_turns`` is exhausted. NULL/0 = classic single-shot + -- worker (the default). + goal_mode INTEGER NOT NULL DEFAULT 0, + -- Goal-loop turn budget for ``goal_mode`` workers. NULL = use the + -- goals-engine default. + goal_max_turns INTEGER, -- Originating chat/agent session id when the task was created from -- inside an agent loop that propagated ``HERMES_SESSION_ID``. NULL -- for tasks created from the CLI, dashboard, or any path that doesn't @@ -1584,6 +1613,20 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None: if "model_override" not in cols: conn.execute("ALTER TABLE tasks ADD COLUMN model_override TEXT") + if "goal_mode" not in cols: + # Ralph-style goal loop toggle for the dispatched worker. 0 (the + # default) = classic single-shot worker, preserving the behaviour + # existing rows had before the column existed. + _add_column_if_missing( + conn, "tasks", "goal_mode", "goal_mode INTEGER NOT NULL DEFAULT 0" + ) + + if "goal_max_turns" not in cols: + # Per-task goal-loop turn budget. NULL = goals-engine default. + _add_column_if_missing( + conn, "tasks", "goal_max_turns", "goal_max_turns INTEGER" + ) + if "session_id" not in cols: # Originating agent/chat session id, populated when the task is # created from within an agent loop that propagated @@ -1967,6 +2010,8 @@ def create_task( max_runtime_seconds: Optional[int] = None, skills: Optional[Iterable[str]] = None, max_retries: Optional[int] = None, + goal_mode: bool = False, + goal_max_turns: Optional[int] = None, initial_status: str = "running", session_id: Optional[str] = None, board: Optional[str] = None, @@ -2134,8 +2179,8 @@ def create_task( id, title, body, assignee, status, priority, created_by, created_at, workspace_kind, workspace_path, branch_name, tenant, idempotency_key, max_runtime_seconds, - skills, max_retries, session_id - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + skills, max_retries, goal_mode, goal_max_turns, session_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( task_id, @@ -2154,6 +2199,8 @@ def create_task( int(max_runtime_seconds) if max_runtime_seconds is not None else None, json.dumps(skills_list) if skills_list is not None else None, int(max_retries) if max_retries is not None else None, + 1 if goal_mode else 0, + int(goal_max_turns) if goal_max_turns is not None else None, session_id, ), ) @@ -2173,6 +2220,7 @@ def create_task( "tenant": tenant, "branch_name": branch_name, "skills": list(skills_list) if skills_list else None, + "goal_mode": bool(goal_mode) or None, }, ) return task_id @@ -6412,6 +6460,13 @@ def _default_spawn( env["HERMES_KANBAN_RUN_ID"] = str(task.current_run_id) if task.claim_lock: env["HERMES_KANBAN_CLAIM_LOCK"] = task.claim_lock + # Goal-loop mode: the worker reads these and wraps its run in the + # Ralph-style /goal judge loop (see cli.py quiet-mode path). Only set + # when enabled so non-goal tasks keep a clean env. + if task.goal_mode: + env["HERMES_KANBAN_GOAL_MODE"] = "1" + if task.goal_max_turns is not None: + env["HERMES_KANBAN_GOAL_MAX_TURNS"] = str(int(task.goal_max_turns)) terminal_timeout = _worker_terminal_timeout_env( task.max_runtime_seconds, env.get("TERMINAL_TIMEOUT"), diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js index c22c06c12..451f3a011 100644 --- a/plugins/kanban/dashboard/dist/index.js +++ b/plugins/kanban/dashboard/dist/index.js @@ -2600,6 +2600,13 @@ // input here to save vertical space in the common `scratch` case. const [workspaceKind, setWorkspaceKind] = useState("scratch"); const [workspacePath, setWorkspacePath] = useState(""); + // Goal-mode: when on, the dispatched worker runs the Ralph-style /goal + // loop — a judge re-checks the card after each turn and the worker keeps + // going in the same session until done, or the turn budget runs out + // (which blocks the card for review). goalMaxTurns is optional; blank + // = backend default. + const [goalMode, setGoalMode] = useState(false); + const [goalMaxTurns, setGoalMaxTurns] = useState(""); const submit = function () { const trimmed = title.trim(); @@ -2626,9 +2633,17 @@ } const wpTrim = workspacePath.trim(); if (wpTrim) body.workspace_path = wpTrim; + // Goal-mode toggle. Only send the keys when enabled so the request + // shape stays small and old dispatchers ignore it cleanly. + if (goalMode) { + body.goal_mode = true; + const gmt = parseInt(goalMaxTurns, 10); + if (Number.isFinite(gmt) && gmt > 0) body.goal_max_turns = gmt; + } props.onSubmit(body); setTitle(""); setAssignee(""); setPriority(0); setParent(""); setSkills(""); setWorkspaceKind("scratch"); setWorkspacePath(""); + setGoalMode(false); setGoalMaxTurns(""); }; const showPathInput = workspaceKind !== "scratch"; @@ -2685,6 +2700,29 @@ title: "Force-load these skills into the worker (in addition to the built-in kanban-worker).", className: "h-7 text-xs", }), + h("div", { className: "flex gap-2 items-center" }, + h("label", { + className: "flex items-center gap-1.5 text-xs cursor-pointer select-none", + title: "Goal mode: the worker keeps going in the same session until a judge agrees the card is done (or the turn budget runs out, which blocks it for review). Best for open-ended cards one shot rarely finishes.", + }, + h("input", { + type: "checkbox", + checked: goalMode, + onChange: function (e) { setGoalMode(!!e.target.checked); }, + className: "h-3.5 w-3.5 accent-current", + }), + tx(t, "goalMode", "goal mode"), + ), + goalMode ? h(Input, { + type: "number", + value: goalMaxTurns, + onChange: function (e) { setGoalMaxTurns(e.target.value); }, + placeholder: tx(t, "goalMaxTurns", "max turns (default 20)"), + className: "h-7 text-xs w-40", + title: "Turn budget for the goal loop. Blank = backend default (20).", + min: 1, + }) : null, + ), h("div", { className: "flex gap-2" }, h(Select, Object.assign({ value: workspaceKind, @@ -3161,6 +3199,12 @@ label: tx(i18n, "skills", "Skills"), value: t.skills.join(", "), }) : null, + t.goal_mode ? h(MetaRow, { + label: tx(i18n, "goalMode", "Goal mode"), + value: t.goal_max_turns + ? `on (max ${t.goal_max_turns} turns)` + : "on", + }) : null, t.created_by ? h(MetaRow, { label: tx(i18n, "createdBy", "Created by"), value: t.created_by }) : null, ), h(StatusActions, { diff --git a/plugins/kanban/dashboard/plugin_api.py b/plugins/kanban/dashboard/plugin_api.py index 0c2122c2a..2d792622f 100644 --- a/plugins/kanban/dashboard/plugin_api.py +++ b/plugins/kanban/dashboard/plugin_api.py @@ -581,6 +581,8 @@ class CreateTaskBody(BaseModel): idempotency_key: Optional[str] = None max_runtime_seconds: Optional[int] = None skills: Optional[list[str]] = None + goal_mode: bool = False + goal_max_turns: Optional[int] = None @router.post("/tasks") @@ -603,6 +605,8 @@ def create_task(payload: CreateTaskBody, board: Optional[str] = Query(None)): idempotency_key=payload.idempotency_key, max_runtime_seconds=payload.max_runtime_seconds, skills=payload.skills, + goal_mode=payload.goal_mode, + goal_max_turns=payload.goal_max_turns, ) task = kanban_db.get_task(conn, task_id) body: dict[str, Any] = {"task": _task_dict(task) if task else None} diff --git a/skills/devops/kanban-orchestrator/SKILL.md b/skills/devops/kanban-orchestrator/SKILL.md index 25f634205..760f83071 100644 --- a/skills/devops/kanban-orchestrator/SKILL.md +++ b/skills/devops/kanban-orchestrator/SKILL.md @@ -178,6 +178,30 @@ Tell them what you created in plain prose, naming the actual profiles you used: **Tenant inheritance.** If `HERMES_TENANT` is set in your env, pass `tenant=os.environ.get("HERMES_TENANT")` on every `kanban_create` call so child tasks stay in the same namespace. +## Goal-mode cards (persistent workers) + +By default a dispatched worker gets **one shot** at its card: it does its work, calls `kanban_complete`/`kanban_block`, and exits. For open-ended cards where one turn rarely finishes the job, pass `goal_mode=True` to wrap that worker in a Ralph-style goal loop — the same engine behind the `/goal` slash command: + +```python +kanban_create( + title="Translate the full docs site to French", + body="Acceptance: every page translated, no English left, links intact.", + assignee="", + goal_mode=True, # judge re-checks the card after each turn + goal_max_turns=15, # optional budget (default 20) +)["task_id"] +``` + +How it behaves: +- After each worker turn, an auxiliary judge evaluates the worker's response against the card's **title + body** (treated as the acceptance criteria). +- Not done + budget remains → the worker keeps going **in the same session** (full context retained — not a fresh respawn). +- Worker calls `kanban_complete`/`kanban_block` itself → loop stops, normal lifecycle. +- Budget exhausted without completion → the card is **blocked** for human review (sticky), never a silent exit. + +When to use it: long, multi-step, or "keep going until X is true" cards. When NOT to: cheap one-shot cards (translation of a single string, a quick lookup) — the judge overhead isn't worth it, and the dispatcher's existing retry/circuit-breaker already handles transient worker failures. + +Write the body as **explicit acceptance criteria** — the judge is only as good as the goal text. "Translate the README" is weaker than "Translate every section of the README to French; no English sentences remain." + ## Recovering stuck workers When a worker profile keeps crashing, hallucinating, or getting blocked by its own mistakes (usually: wrong model, missing skill, broken credential), the kanban dashboard flags the task with a ⚠ badge and opens a **Recovery** section in the drawer. Three primary actions: diff --git a/tests/hermes_cli/test_kanban_goal_mode.py b/tests/hermes_cli/test_kanban_goal_mode.py new file mode 100644 index 000000000..173174374 --- /dev/null +++ b/tests/hermes_cli/test_kanban_goal_mode.py @@ -0,0 +1,300 @@ +"""Tests for kanban goal_mode — per-card Ralph-style goal loop. + +Covers three layers: + +1. DB: goal_mode / goal_max_turns persist through create_task + from_row, + and a legacy DB (without the columns) migrates cleanly. +2. Spawn: _default_spawn sets the HERMES_KANBAN_GOAL_MODE env vars only + when the card opts in. +3. Loop: goals.run_kanban_goal_loop continuation / completion / budget + behaviour, driven entirely through injected callbacks (no live model). +""" + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +import pytest + +from hermes_cli import kanban_db as kb +from hermes_cli import goals + + +@pytest.fixture +def kanban_home(tmp_path, monkeypatch): + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +# --------------------------------------------------------------------------- +# DB layer +# --------------------------------------------------------------------------- + +def test_goal_mode_defaults_off(kanban_home): + with kb.connect() as conn: + tid = kb.create_task(conn, title="plain task", assignee="worker") + task = kb.get_task(conn, tid) + assert task.goal_mode is False + assert task.goal_max_turns is None + + +def test_goal_mode_persists(kanban_home): + with kb.connect() as conn: + tid = kb.create_task( + conn, + title="open-ended task", + assignee="worker", + goal_mode=True, + goal_max_turns=7, + ) + task = kb.get_task(conn, tid) + assert task.goal_mode is True + assert task.goal_max_turns == 7 + + +def test_goal_mode_without_max_turns(kanban_home): + with kb.connect() as conn: + tid = kb.create_task( + conn, title="t", assignee="worker", goal_mode=True + ) + task = kb.get_task(conn, tid) + assert task.goal_mode is True + assert task.goal_max_turns is None + + +def test_legacy_db_migrates_goal_columns(tmp_path, monkeypatch): + """A tasks table created without goal columns must gain them on init.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + db_path = kb.kanban_db_path() + db_path.parent.mkdir(parents=True, exist_ok=True) + # Minimal legacy schema: tasks table missing goal_mode / goal_max_turns. + legacy = sqlite3.connect(db_path) + legacy.execute( + """ + CREATE TABLE tasks ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + body TEXT, + assignee TEXT, + status TEXT NOT NULL DEFAULT 'ready', + priority INTEGER NOT NULL DEFAULT 0, + created_by TEXT, + created_at INTEGER NOT NULL, + started_at INTEGER, + completed_at INTEGER, + workspace_kind TEXT NOT NULL DEFAULT 'scratch', + workspace_path TEXT, + claim_lock TEXT, + claim_expires INTEGER + ) + """ + ) + legacy.execute( + "INSERT INTO tasks (id, title, status, priority, created_at, workspace_kind) " + "VALUES ('legacy1', 'old', 'ready', 0, 1, 'scratch')" + ) + legacy.commit() + legacy.close() + + # init_db runs the additive migration. + kb.init_db() + with kb.connect() as conn: + cols = {r["name"] for r in conn.execute("PRAGMA table_info(tasks)")} + assert "goal_mode" in cols + assert "goal_max_turns" in cols + task = kb.get_task(conn, "legacy1") + # Existing row keeps the safe default. + assert task.goal_mode is False + assert task.goal_max_turns is None + + +# --------------------------------------------------------------------------- +# Spawn env +# --------------------------------------------------------------------------- + +def test_spawn_sets_goal_env_only_when_enabled(kanban_home, monkeypatch): + captured = {} + + class _FakeProc: + pid = 4242 + + def _fake_popen(cmd, **kwargs): + captured["env"] = kwargs.get("env", {}) + return _FakeProc() + + monkeypatch.setattr("subprocess.Popen", _fake_popen) + # Avoid the kanban-worker skill probe touching the real skills dir. + monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda home: False) + + with kb.connect() as conn: + tid = kb.create_task( + conn, + title="goal task", + assignee="default", + goal_mode=True, + goal_max_turns=5, + ) + task = kb.get_task(conn, tid) + + kb._default_spawn(task, str(kanban_home)) + env = captured["env"] + assert env.get("HERMES_KANBAN_GOAL_MODE") == "1" + assert env.get("HERMES_KANBAN_GOAL_MAX_TURNS") == "5" + + +def test_spawn_no_goal_env_for_plain_task(kanban_home, monkeypatch): + captured = {} + + class _FakeProc: + pid = 4243 + + def _fake_popen(cmd, **kwargs): + captured["env"] = kwargs.get("env", {}) + return _FakeProc() + + monkeypatch.setattr("subprocess.Popen", _fake_popen) + monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda home: False) + + with kb.connect() as conn: + tid = kb.create_task(conn, title="plain", assignee="default") + task = kb.get_task(conn, tid) + + kb._default_spawn(task, str(kanban_home)) + env = captured["env"] + assert "HERMES_KANBAN_GOAL_MODE" not in env + assert "HERMES_KANBAN_GOAL_MAX_TURNS" not in env + + +# --------------------------------------------------------------------------- +# Goal loop logic (callback-injected, no live model) +# --------------------------------------------------------------------------- + +def _patch_judge(monkeypatch, verdicts): + """Make judge_goal return a scripted sequence of verdicts.""" + seq = list(verdicts) + + def _fake_judge(goal, response, subgoals=None): + v = seq.pop(0) if seq else "done" + return v, f"scripted:{v}", False + + monkeypatch.setattr(goals, "judge_goal", _fake_judge) + + +def test_loop_stops_when_worker_already_completed(monkeypatch): + # Worker called kanban_complete on its first turn — no judging needed. + _patch_judge(monkeypatch, ["continue"]) # should never be consulted + turns = [] + + res = goals.run_kanban_goal_loop( + task_id="t1", + goal_text="do the thing", + run_turn=lambda p: turns.append(p) or "x", + task_status_fn=lambda: "done", + block_fn=lambda r: pytest.fail("should not block"), + first_response="done already", + ) + assert res["outcome"] == "completed_by_worker" + assert turns == [] # no extra turns + + +def test_loop_continues_then_worker_completes(monkeypatch): + _patch_judge(monkeypatch, ["continue", "continue"]) + statuses = iter(["running", "running", "done"]) + turns = [] + + res = goals.run_kanban_goal_loop( + task_id="t2", + goal_text="ship feature", + run_turn=lambda p: turns.append(p) or f"turn{len(turns)}", + task_status_fn=lambda: next(statuses), + block_fn=lambda r: pytest.fail("should not block"), + max_turns=10, + first_response="started", + ) + assert res["outcome"] == "completed_by_worker" + # Two continuation turns fed before the worker completed. + assert len(turns) == 2 + assert all("not done yet" in p for p in turns) + + +def test_loop_blocks_on_budget_exhaustion(monkeypatch): + _patch_judge(monkeypatch, ["continue"] * 10) + blocked = {} + + def _block(reason): + blocked["reason"] = reason + + res = goals.run_kanban_goal_loop( + task_id="t3", + goal_text="endless task", + run_turn=lambda p: "still going", + task_status_fn=lambda: "running", + block_fn=_block, + max_turns=3, + first_response="turn1", + ) + assert res["outcome"] == "blocked_budget" + assert res["turns_used"] == 3 + assert "turn budget" in blocked["reason"].lower() + + +def test_loop_finalize_nudge_when_judge_done_but_open(monkeypatch): + # Judge says done, but worker never terminated → one finalize nudge, + # then worker completes. + _patch_judge(monkeypatch, ["done", "done"]) + statuses = iter(["running", "done"]) + turns = [] + + res = goals.run_kanban_goal_loop( + task_id="t4", + goal_text="task", + run_turn=lambda p: turns.append(p) or "ok", + task_status_fn=lambda: next(statuses), + block_fn=lambda r: pytest.fail("should not block"), + max_turns=10, + first_response="looks done", + ) + assert res["outcome"] == "completed_by_worker" + assert len(turns) == 1 + assert "still open" in turns[0] + + +def test_loop_blocks_when_judge_done_but_never_finalizes(monkeypatch): + # Judge keeps saying done, worker never calls kanban_complete → block + # after the single finalize nudge. + _patch_judge(monkeypatch, ["done", "done"]) + blocked = {} + + res = goals.run_kanban_goal_loop( + task_id="t5", + goal_text="task", + run_turn=lambda p: "still not finalizing", + task_status_fn=lambda: "running", + block_fn=lambda r: blocked.update(reason=r), + max_turns=10, + first_response="looks done", + ) + assert res["outcome"] == "blocked_budget" + assert "finalize" in blocked["reason"].lower() + + +def test_loop_stops_if_task_reclaimed(monkeypatch): + _patch_judge(monkeypatch, ["continue"]) + res = goals.run_kanban_goal_loop( + task_id="t6", + goal_text="task", + run_turn=lambda p: pytest.fail("should not run a turn"), + task_status_fn=lambda: "archived", + block_fn=lambda r: pytest.fail("should not block"), + first_response="x", + ) + assert res["outcome"] == "stopped" diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py index d3493f0f8..3b4ede304 100644 --- a/tools/kanban_tools.py +++ b/tools/kanban_tools.py @@ -759,6 +759,10 @@ def _handle_create(args: dict, **kw) -> str: return tool_error( f"skills must be a list of skill names, got {type(skills).__name__}" ) + goal_mode, goal_bool_error = _parse_bool_arg(args, "goal_mode") + if goal_bool_error: + return tool_error(goal_bool_error) + goal_max_turns = args.get("goal_max_turns") if isinstance(parents, str): parents = [parents] if not isinstance(parents, (list, tuple)): @@ -786,6 +790,10 @@ def _handle_create(args: dict, **kw) -> str: if max_runtime_seconds is not None else None ), skills=skills, + goal_mode=goal_mode, + goal_max_turns=( + int(goal_max_turns) if goal_max_turns is not None else None + ), initial_status=str(initial_status), created_by=os.environ.get("HERMES_PROFILE") or "worker", session_id=session_id, @@ -1250,6 +1258,29 @@ KANBAN_CREATE_SCHEMA = { "assignee's profile." ), }, + "goal_mode": { + "type": "boolean", + "description": ( + "Run the dispatched worker in a goal loop. When true, " + "after each turn an auxiliary judge checks the worker's " + "response against this card's title/body; if the work " + "isn't done and budget remains, the worker keeps going " + "in the same session until the judge agrees it's " + "complete (or the goal-turn budget is exhausted, which " + "blocks the task for human review). Use this for " + "open-ended cards where one shot rarely finishes the " + "work. Defaults to false (classic single-shot worker)." + ), + }, + "goal_max_turns": { + "type": "integer", + "description": ( + "Turn budget for goal_mode workers. Caps how many " + "continuation turns the worker may take before the task " + "is blocked for review. Ignored unless goal_mode is " + "true. Defaults to the goal-engine default (20)." + ), + }, "board": _board_schema_prop(), }, "required": ["title", "assignee"], diff --git a/website/docs/user-guide/features/kanban.md b/website/docs/user-guide/features/kanban.md index 0192f9c64..4c8ae55e8 100644 --- a/website/docs/user-guide/features/kanban.md +++ b/website/docs/user-guide/features/kanban.md @@ -428,6 +428,20 @@ hermes kanban create "audit auth flow" \ These skills are **additive** to the built-in `kanban-worker` — the dispatcher emits one `--skills ` flag for each (and for the built-in), so the worker spawns with all of them loaded. The skill names must match skills that are actually installed on the assignee's profile (run `hermes skills list` to see what's available); there's no runtime install. +### Goal-mode cards (`--goal`) + +By default each worker gets **one shot** at its card — do the work, call `kanban_complete`/`kanban_block`, exit. Pass `--goal` (CLI) or `goal_mode=True` (the `kanban_create` tool / dashboard) to instead run that worker in a **goal loop**, the same Ralph-style engine behind the `/goal` slash command: after every turn an auxiliary judge checks the worker's output against the card's title + body (treated as the acceptance criteria), and if the work isn't done — and the turn budget remains — the worker keeps going **in the same session** until the judge agrees, the worker terminates the task itself, or the budget runs out (which **blocks** the card for human review rather than exiting silently). + +```bash +hermes kanban create "Translate the docs site to French" \ + --body "Acceptance: every page translated, no English left, links intact." \ + --assignee linguist \ + --goal \ + --goal-max-turns 15 # optional; default 20 +``` + +Use it for open-ended, multi-step, or "keep going until X is true" cards. Skip it for cheap one-shot work — the per-turn judge overhead isn't worth it, and the dispatcher's existing retry/circuit-breaker already handles transient worker failures. The judge is only as good as your goal text, so write the body as **explicit acceptance criteria**. + ### The orchestrator skill A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to one of the profiles you've set up, and steps back. The `kanban-orchestrator` skill encodes this as tool-call patterns: anti-temptation rules, a Step-0 profile-discovery prompt (the dispatcher silently fails on unknown assignee names, so the orchestrator must ground every card in profiles that actually exist on your machine), and a decomposition playbook keyed on `kanban_create` / `kanban_link` / `kanban_comment`. @@ -632,6 +646,7 @@ hermes kanban create "" [--body ...] [--assignee <profile>] [--priority N] [--triage] [--idempotency-key KEY] [--max-runtime 30m|2h|1d|<seconds>] [--max-retries N] + [--goal] [--goal-max-turns N] [--skill <name>]... [--json] hermes kanban list [--mine] [--assignee P] [--status S] [--tenant T] [--archived]