diff --git a/cli.py b/cli.py index baf033920..1e8229be4 100644 --- a/cli.py +++ b/cli.py @@ -15074,6 +15074,96 @@ class HermesCLI: # Main Entry Point # ============================================================================ +def _run_kanban_goal_loop_q(cli: "HermesCLI", first_response: str) -> None: + """Drive a kanban goal_mode worker through the Ralph-style goal loop. + + Called from the quiet single-query path AFTER the worker's first turn, + only when ``HERMES_KANBAN_GOAL_MODE`` is set (dispatcher-spawned + goal_mode card). Wires the worker's ``run_conversation`` and the kanban + DB into ``goals.run_kanban_goal_loop``. All errors are swallowed by the + caller — a broken goal loop must never wedge a worker, the dispatcher's + claim TTL / crash detection is the backstop. + """ + import os as _os + + task_id = (_os.environ.get("HERMES_KANBAN_TASK") or "").strip() + if not task_id: + return + + from hermes_cli import kanban_db as _kb + from hermes_cli.goals import run_kanban_goal_loop as _run_loop, DEFAULT_MAX_TURNS as _DEF_TURNS + + # Resolve goal text from the card (title + body = the acceptance + # criteria the judge evaluates against). + conn = _kb.connect() + try: + task = _kb.get_task(conn, task_id) + finally: + try: + conn.close() + except Exception: + pass + if task is None: + return + + goal_parts = [task.title or ""] + if task.body: + goal_parts.append(task.body) + goal_text = "\n\n".join(p for p in goal_parts if p).strip() + if not goal_text: + return + + max_turns = task.goal_max_turns or _DEF_TURNS + + def _run_turn(prompt: str) -> str: + result = cli.agent.run_conversation( + user_message=prompt, + conversation_history=cli.conversation_history, + ) + # Keep session_id in sync if mid-run compression rotated it. + if ( + getattr(cli.agent, "session_id", None) + and cli.agent.session_id != cli.session_id + ): + cli.session_id = cli.agent.session_id + resp = result.get("final_response", "") if isinstance(result, dict) else str(result) + if resp: + print(resp) + return resp or "" + + def _task_status() -> "str | None": + c = _kb.connect() + try: + t = _kb.get_task(c, task_id) + return t.status if t is not None else None + finally: + try: + c.close() + except Exception: + pass + + def _block(reason: str) -> None: + c = _kb.connect() + try: + _kb.block_task(c, task_id, reason=reason) + finally: + try: + c.close() + except Exception: + pass + + _run_loop( + task_id=task_id, + goal_text=goal_text, + run_turn=_run_turn, + task_status_fn=_task_status, + block_fn=_block, + max_turns=max_turns, + first_response=first_response or "", + log=lambda m: logger.info("%s", m), + ) + + def main( query: str = None, q: str = None, @@ -15471,6 +15561,20 @@ def main( print(f"Error: {result['error']}", file=sys.stderr) elif response: print(response) + + # Kanban goal-loop mode: a worker spawned for a + # goal_mode card keeps working in THIS session until an + # auxiliary judge agrees the card is done, the worker + # terminates the task itself, or the turn budget runs + # out (→ sticky block). Gated on the env vars the + # dispatcher sets in `_default_spawn`; a no-op for every + # normal worker and every non-kanban `-q` run. + if os.environ.get("HERMES_KANBAN_GOAL_MODE") == "1": + try: + _run_kanban_goal_loop_q(cli, response) + except Exception as _goal_exc: + logger.debug("kanban goal loop failed: %s", _goal_exc) + # Session ID goes to stderr so piped stdout is clean. print(f"\nsession_id: {cli.session_id}", file=sys.stderr) diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py index d6a139419..a6a28deaf 100644 --- a/hermes_cli/goals.py +++ b/hermes_cli/goals.py @@ -747,6 +747,153 @@ class GoalManager: return CONTINUATION_PROMPT_TEMPLATE.format(goal=self._state.goal) +# ────────────────────────────────────────────────────────────────────── +# Kanban worker goal loop +# ────────────────────────────────────────────────────────────────────── + +# Continuation prompt fed back to a kanban goal-mode worker that has not +# yet completed/blocked its task. The card's own acceptance criteria are +# the goal — the worker already has the full task body in its first turn, +# so we keep this short and point it back at the lifecycle contract. +KANBAN_GOAL_CONTINUATION_TEMPLATE = ( + "[Continuing toward this kanban task — judge says it is not done yet]\n" + "Reason: {reason}\n\n" + "Take the next concrete step toward completing the task. When the work " + "is genuinely finished, call kanban_complete with a summary. If you are " + "blocked and need human input, call kanban_block with a reason. Do not " + "stop without calling one of them." +) + +# Fed when the judge believes the work is done but the worker never called +# kanban_complete / kanban_block. One explicit nudge to terminate the task +# the right way before the loop gives up. +KANBAN_GOAL_FINALIZE_TEMPLATE = ( + "[The work looks complete, but the task is still open]\n" + "Reason: {reason}\n\n" + "If the task is genuinely done, call kanban_complete now with a short " + "summary of what you did. If something still blocks completion, call " + "kanban_block with the reason instead." +) + + +def run_kanban_goal_loop( + *, + task_id: str, + goal_text: str, + run_turn, + task_status_fn, + block_fn, + max_turns: int = DEFAULT_MAX_TURNS, + first_response: str = "", + log=None, +) -> Dict[str, Any]: + """Drive a kanban worker through a Ralph-style goal loop. + + The dispatcher spawns a goal-mode worker exactly like a normal worker + (``hermes -p chat -q "work kanban task "``). The worker's + first turn has already run by the time this is called; ``first_response`` + is that turn's reply. From here we: + + 1. Check whether the worker already terminated the task (called + ``kanban_complete`` / ``kanban_block``). If so, stop — nothing to do. + 2. Otherwise judge the latest response against ``goal_text`` (the card's + title + body). ``continue`` → feed a continuation prompt and run + another turn IN THE SAME SESSION via ``run_turn``. ``done`` but the + task is still open → one explicit "call kanban_complete" nudge. + 3. When the turn budget is exhausted and the worker still hasn't + terminated the task, ``block_fn`` is invoked so the card lands in a + sticky ``blocked`` state for human review (NOT a silent exit). + + This function performs NO SessionDB persistence — a worker process is + ephemeral, so the turn budget lives in a local counter. It is fully + decoupled from the CLI for testability: callers inject ``run_turn`` + (str -> str), ``task_status_fn`` (() -> str|None), and ``block_fn`` + (reason: str -> None). + + Returns a decision dict: ``{"outcome", "turns_used", "reason"}`` where + outcome is one of ``"completed_by_worker"``, ``"blocked_budget"``, + ``"blocked_by_worker"``, or ``"stopped"``. + """ + + def _log(msg: str) -> None: + if log is not None: + try: + log(msg) + except Exception: + pass + + max_turns = int(max_turns or DEFAULT_MAX_TURNS) + if max_turns < 1: + max_turns = DEFAULT_MAX_TURNS + + last_response = first_response or "" + # The first turn already consumed one unit of budget. + turns_used = 1 + nudged_to_finalize = False + + while True: + # Did the worker terminate the task itself this turn? + try: + status = task_status_fn() + except Exception as exc: + _log(f"kanban goal loop: status check failed ({exc}); stopping") + return {"outcome": "stopped", "turns_used": turns_used, "reason": "status check failed"} + + if status == "done": + _log(f"kanban goal loop: task {task_id} completed by worker after {turns_used} turn(s)") + return {"outcome": "completed_by_worker", "turns_used": turns_used, "reason": "worker completed the task"} + if status == "blocked": + _log(f"kanban goal loop: task {task_id} blocked by worker after {turns_used} turn(s)") + return {"outcome": "blocked_by_worker", "turns_used": turns_used, "reason": "worker blocked the task"} + if status not in ("running", "ready"): + # Reclaimed / archived / unexpected — let the dispatcher own it. + _log(f"kanban goal loop: task {task_id} status={status!r}; stopping") + return {"outcome": "stopped", "turns_used": turns_used, "reason": f"status={status}"} + + # Still open — judge whether the latest response satisfies the card. + verdict, reason, _parse_failed = judge_goal(goal_text, last_response) + _log(f"kanban goal loop: turn {turns_used}/{max_turns} verdict={verdict} reason={_truncate(reason, 120)}") + + if verdict == "done": + if nudged_to_finalize: + # Already asked once to call kanban_complete and it still + # didn't — block for review rather than spin. + _log(f"kanban goal loop: task {task_id} judged done but worker won't finalize; blocking") + try: + block_fn( + f"Goal-mode worker's output looked complete but it never " + f"called kanban_complete after a finalize nudge ({reason})." + ) + except Exception as exc: + _log(f"kanban goal loop: block_fn failed ({exc})") + return {"outcome": "blocked_budget", "turns_used": turns_used, "reason": "judged done, never finalized"} + prompt = KANBAN_GOAL_FINALIZE_TEMPLATE.format(reason=_truncate(reason, 400)) + nudged_to_finalize = True + else: + prompt = KANBAN_GOAL_CONTINUATION_TEMPLATE.format(reason=_truncate(reason, 400)) + + # Budget check BEFORE spending another turn. + if turns_used >= max_turns: + _log(f"kanban goal loop: task {task_id} exhausted {turns_used}/{max_turns} turns; blocking") + try: + block_fn( + f"Goal-mode worker exhausted its turn budget " + f"({turns_used}/{max_turns}) without completing the task. " + f"Last judge verdict: {_truncate(reason, 300)}" + ) + except Exception as exc: + _log(f"kanban goal loop: block_fn failed ({exc})") + return {"outcome": "blocked_budget", "turns_used": turns_used, "reason": "turn budget exhausted"} + + # Run another turn in the same session. + try: + last_response = run_turn(prompt) or "" + except Exception as exc: + _log(f"kanban goal loop: run_turn failed ({exc}); stopping") + return {"outcome": "stopped", "turns_used": turns_used, "reason": f"run_turn error: {type(exc).__name__}"} + turns_used += 1 + + __all__ = [ "GoalState", "GoalManager", @@ -754,9 +901,12 @@ __all__ = [ "CONTINUATION_PROMPT_WITH_SUBGOALS_TEMPLATE", "JUDGE_USER_PROMPT_TEMPLATE", "JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE", + "KANBAN_GOAL_CONTINUATION_TEMPLATE", + "KANBAN_GOAL_FINALIZE_TEMPLATE", "DEFAULT_MAX_TURNS", "load_goal", "save_goal", "clear_goal", "judge_goal", + "run_kanban_goal_loop", ] diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py index a6e76fe35..8b67ebc3d 100644 --- a/hermes_cli/kanban.py +++ b/hermes_cli/kanban.py @@ -341,6 +341,19 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu "two retries. Omit to use the dispatcher's " "kanban.failure_limit config " f"(default {kb.DEFAULT_FAILURE_LIMIT}).") + p_create.add_argument("--goal", action="store_true", dest="goal_mode", + help="Run the worker in a goal loop: after each " + "turn a judge checks the response against the " + "card title/body and, if not done, the worker " + "keeps going in the same session until the " + "judge agrees it's complete (or the turn " + "budget runs out, which blocks the card for " + "review). Best for open-ended cards one shot " + "rarely finishes.") + p_create.add_argument("--goal-max-turns", type=int, default=None, + metavar="N", dest="goal_max_turns", + help="Turn budget for --goal workers (default 20). " + "Ignored without --goal.") p_create.add_argument("--initial-status", choices=sorted(kb.VALID_INITIAL_STATUSES), default="running", @@ -1343,6 +1356,8 @@ def _cmd_create(args: argparse.Namespace) -> int: max_runtime_seconds=max_runtime, skills=getattr(args, "skills", None) or None, max_retries=max_retries, + goal_mode=bool(getattr(args, "goal_mode", False)), + goal_max_turns=getattr(args, "goal_max_turns", None), initial_status=getattr(args, "initial_status", "running"), ) task = kb.get_task(conn, task_id) diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 471165524..3bb14573e 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -725,6 +725,19 @@ class Task: # ``kanban.failure_limit`` config, and then to ``DEFAULT_FAILURE_LIMIT``. # Name matches the ``--max-retries`` CLI flag on ``kanban create``. max_retries: Optional[int] = None + # When True, the dispatched worker runs in a Ralph-style goal loop + # (the same engine behind the ``/goal`` slash command): after each + # turn an auxiliary judge model evaluates the worker's response + # against this card's title/body (treated as the goal). If the judge + # says "not done" and budget remains, the worker is fed a + # continuation prompt IN THE SAME SESSION and keeps working until the + # judge agrees, the goal-turn budget is exhausted (→ kanban_block), + # or the worker explicitly blocks/completes. ``False`` (default) = + # the classic single-shot worker. ``goal_max_turns`` bounds the loop. + goal_mode: bool = False + # Goal-loop turn budget for ``goal_mode`` workers. ``None`` falls + # through to the goals engine default (``goals.DEFAULT_MAX_TURNS``). + goal_max_turns: Optional[int] = None # Originating chat/agent session id, when the task was created from # within an agent loop that propagated ``HERMES_SESSION_ID``. NULL for # tasks created from the CLI, the dashboard, or any path that doesn't @@ -797,6 +810,12 @@ class Task: max_retries=( row["max_retries"] if "max_retries" in keys else None ), + goal_mode=( + bool(row["goal_mode"]) if "goal_mode" in keys and row["goal_mode"] else False + ), + goal_max_turns=( + row["goal_max_turns"] if "goal_max_turns" in keys and row["goal_max_turns"] else None + ), session_id=( row["session_id"] if "session_id" in keys else None ), @@ -946,6 +965,16 @@ CREATE TABLE IF NOT EXISTS tasks ( -- case) falls through to the dispatcher-level ``kanban.failure_limit`` -- config and then ``DEFAULT_FAILURE_LIMIT``. max_retries INTEGER, + -- When 1, the dispatched worker runs in a Ralph-style goal loop: an + -- auxiliary judge re-evaluates the worker's response against the + -- card title/body after each turn and feeds a continuation prompt + -- back into the SAME session until the judge agrees the work is done + -- or ``goal_max_turns`` is exhausted. NULL/0 = classic single-shot + -- worker (the default). + goal_mode INTEGER NOT NULL DEFAULT 0, + -- Goal-loop turn budget for ``goal_mode`` workers. NULL = use the + -- goals-engine default. + goal_max_turns INTEGER, -- Originating chat/agent session id when the task was created from -- inside an agent loop that propagated ``HERMES_SESSION_ID``. NULL -- for tasks created from the CLI, dashboard, or any path that doesn't @@ -1584,6 +1613,20 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None: if "model_override" not in cols: conn.execute("ALTER TABLE tasks ADD COLUMN model_override TEXT") + if "goal_mode" not in cols: + # Ralph-style goal loop toggle for the dispatched worker. 0 (the + # default) = classic single-shot worker, preserving the behaviour + # existing rows had before the column existed. + _add_column_if_missing( + conn, "tasks", "goal_mode", "goal_mode INTEGER NOT NULL DEFAULT 0" + ) + + if "goal_max_turns" not in cols: + # Per-task goal-loop turn budget. NULL = goals-engine default. + _add_column_if_missing( + conn, "tasks", "goal_max_turns", "goal_max_turns INTEGER" + ) + if "session_id" not in cols: # Originating agent/chat session id, populated when the task is # created from within an agent loop that propagated @@ -1967,6 +2010,8 @@ def create_task( max_runtime_seconds: Optional[int] = None, skills: Optional[Iterable[str]] = None, max_retries: Optional[int] = None, + goal_mode: bool = False, + goal_max_turns: Optional[int] = None, initial_status: str = "running", session_id: Optional[str] = None, board: Optional[str] = None, @@ -2134,8 +2179,8 @@ def create_task( id, title, body, assignee, status, priority, created_by, created_at, workspace_kind, workspace_path, branch_name, tenant, idempotency_key, max_runtime_seconds, - skills, max_retries, session_id - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + skills, max_retries, goal_mode, goal_max_turns, session_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( task_id, @@ -2154,6 +2199,8 @@ def create_task( int(max_runtime_seconds) if max_runtime_seconds is not None else None, json.dumps(skills_list) if skills_list is not None else None, int(max_retries) if max_retries is not None else None, + 1 if goal_mode else 0, + int(goal_max_turns) if goal_max_turns is not None else None, session_id, ), ) @@ -2173,6 +2220,7 @@ def create_task( "tenant": tenant, "branch_name": branch_name, "skills": list(skills_list) if skills_list else None, + "goal_mode": bool(goal_mode) or None, }, ) return task_id @@ -6412,6 +6460,13 @@ def _default_spawn( env["HERMES_KANBAN_RUN_ID"] = str(task.current_run_id) if task.claim_lock: env["HERMES_KANBAN_CLAIM_LOCK"] = task.claim_lock + # Goal-loop mode: the worker reads these and wraps its run in the + # Ralph-style /goal judge loop (see cli.py quiet-mode path). Only set + # when enabled so non-goal tasks keep a clean env. + if task.goal_mode: + env["HERMES_KANBAN_GOAL_MODE"] = "1" + if task.goal_max_turns is not None: + env["HERMES_KANBAN_GOAL_MAX_TURNS"] = str(int(task.goal_max_turns)) terminal_timeout = _worker_terminal_timeout_env( task.max_runtime_seconds, env.get("TERMINAL_TIMEOUT"), diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js index c22c06c12..451f3a011 100644 --- a/plugins/kanban/dashboard/dist/index.js +++ b/plugins/kanban/dashboard/dist/index.js @@ -2600,6 +2600,13 @@ // input here to save vertical space in the common `scratch` case. const [workspaceKind, setWorkspaceKind] = useState("scratch"); const [workspacePath, setWorkspacePath] = useState(""); + // Goal-mode: when on, the dispatched worker runs the Ralph-style /goal + // loop — a judge re-checks the card after each turn and the worker keeps + // going in the same session until done, or the turn budget runs out + // (which blocks the card for review). goalMaxTurns is optional; blank + // = backend default. + const [goalMode, setGoalMode] = useState(false); + const [goalMaxTurns, setGoalMaxTurns] = useState(""); const submit = function () { const trimmed = title.trim(); @@ -2626,9 +2633,17 @@ } const wpTrim = workspacePath.trim(); if (wpTrim) body.workspace_path = wpTrim; + // Goal-mode toggle. Only send the keys when enabled so the request + // shape stays small and old dispatchers ignore it cleanly. + if (goalMode) { + body.goal_mode = true; + const gmt = parseInt(goalMaxTurns, 10); + if (Number.isFinite(gmt) && gmt > 0) body.goal_max_turns = gmt; + } props.onSubmit(body); setTitle(""); setAssignee(""); setPriority(0); setParent(""); setSkills(""); setWorkspaceKind("scratch"); setWorkspacePath(""); + setGoalMode(false); setGoalMaxTurns(""); }; const showPathInput = workspaceKind !== "scratch"; @@ -2685,6 +2700,29 @@ title: "Force-load these skills into the worker (in addition to the built-in kanban-worker).", className: "h-7 text-xs", }), + h("div", { className: "flex gap-2 items-center" }, + h("label", { + className: "flex items-center gap-1.5 text-xs cursor-pointer select-none", + title: "Goal mode: the worker keeps going in the same session until a judge agrees the card is done (or the turn budget runs out, which blocks it for review). Best for open-ended cards one shot rarely finishes.", + }, + h("input", { + type: "checkbox", + checked: goalMode, + onChange: function (e) { setGoalMode(!!e.target.checked); }, + className: "h-3.5 w-3.5 accent-current", + }), + tx(t, "goalMode", "goal mode"), + ), + goalMode ? h(Input, { + type: "number", + value: goalMaxTurns, + onChange: function (e) { setGoalMaxTurns(e.target.value); }, + placeholder: tx(t, "goalMaxTurns", "max turns (default 20)"), + className: "h-7 text-xs w-40", + title: "Turn budget for the goal loop. Blank = backend default (20).", + min: 1, + }) : null, + ), h("div", { className: "flex gap-2" }, h(Select, Object.assign({ value: workspaceKind, @@ -3161,6 +3199,12 @@ label: tx(i18n, "skills", "Skills"), value: t.skills.join(", "), }) : null, + t.goal_mode ? h(MetaRow, { + label: tx(i18n, "goalMode", "Goal mode"), + value: t.goal_max_turns + ? `on (max ${t.goal_max_turns} turns)` + : "on", + }) : null, t.created_by ? h(MetaRow, { label: tx(i18n, "createdBy", "Created by"), value: t.created_by }) : null, ), h(StatusActions, { diff --git a/plugins/kanban/dashboard/plugin_api.py b/plugins/kanban/dashboard/plugin_api.py index 0c2122c2a..2d792622f 100644 --- a/plugins/kanban/dashboard/plugin_api.py +++ b/plugins/kanban/dashboard/plugin_api.py @@ -581,6 +581,8 @@ class CreateTaskBody(BaseModel): idempotency_key: Optional[str] = None max_runtime_seconds: Optional[int] = None skills: Optional[list[str]] = None + goal_mode: bool = False + goal_max_turns: Optional[int] = None @router.post("/tasks") @@ -603,6 +605,8 @@ def create_task(payload: CreateTaskBody, board: Optional[str] = Query(None)): idempotency_key=payload.idempotency_key, max_runtime_seconds=payload.max_runtime_seconds, skills=payload.skills, + goal_mode=payload.goal_mode, + goal_max_turns=payload.goal_max_turns, ) task = kanban_db.get_task(conn, task_id) body: dict[str, Any] = {"task": _task_dict(task) if task else None} diff --git a/skills/devops/kanban-orchestrator/SKILL.md b/skills/devops/kanban-orchestrator/SKILL.md index 25f634205..760f83071 100644 --- a/skills/devops/kanban-orchestrator/SKILL.md +++ b/skills/devops/kanban-orchestrator/SKILL.md @@ -178,6 +178,30 @@ Tell them what you created in plain prose, naming the actual profiles you used: **Tenant inheritance.** If `HERMES_TENANT` is set in your env, pass `tenant=os.environ.get("HERMES_TENANT")` on every `kanban_create` call so child tasks stay in the same namespace. +## Goal-mode cards (persistent workers) + +By default a dispatched worker gets **one shot** at its card: it does its work, calls `kanban_complete`/`kanban_block`, and exits. For open-ended cards where one turn rarely finishes the job, pass `goal_mode=True` to wrap that worker in a Ralph-style goal loop — the same engine behind the `/goal` slash command: + +```python +kanban_create( + title="Translate the full docs site to French", + body="Acceptance: every page translated, no English left, links intact.", + assignee="", + goal_mode=True, # judge re-checks the card after each turn + goal_max_turns=15, # optional budget (default 20) +)["task_id"] +``` + +How it behaves: +- After each worker turn, an auxiliary judge evaluates the worker's response against the card's **title + body** (treated as the acceptance criteria). +- Not done + budget remains → the worker keeps going **in the same session** (full context retained — not a fresh respawn). +- Worker calls `kanban_complete`/`kanban_block` itself → loop stops, normal lifecycle. +- Budget exhausted without completion → the card is **blocked** for human review (sticky), never a silent exit. + +When to use it: long, multi-step, or "keep going until X is true" cards. When NOT to: cheap one-shot cards (translation of a single string, a quick lookup) — the judge overhead isn't worth it, and the dispatcher's existing retry/circuit-breaker already handles transient worker failures. + +Write the body as **explicit acceptance criteria** — the judge is only as good as the goal text. "Translate the README" is weaker than "Translate every section of the README to French; no English sentences remain." + ## Recovering stuck workers When a worker profile keeps crashing, hallucinating, or getting blocked by its own mistakes (usually: wrong model, missing skill, broken credential), the kanban dashboard flags the task with a ⚠ badge and opens a **Recovery** section in the drawer. Three primary actions: diff --git a/tests/hermes_cli/test_kanban_goal_mode.py b/tests/hermes_cli/test_kanban_goal_mode.py new file mode 100644 index 000000000..173174374 --- /dev/null +++ b/tests/hermes_cli/test_kanban_goal_mode.py @@ -0,0 +1,300 @@ +"""Tests for kanban goal_mode — per-card Ralph-style goal loop. + +Covers three layers: + +1. DB: goal_mode / goal_max_turns persist through create_task + from_row, + and a legacy DB (without the columns) migrates cleanly. +2. Spawn: _default_spawn sets the HERMES_KANBAN_GOAL_MODE env vars only + when the card opts in. +3. Loop: goals.run_kanban_goal_loop continuation / completion / budget + behaviour, driven entirely through injected callbacks (no live model). +""" + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +import pytest + +from hermes_cli import kanban_db as kb +from hermes_cli import goals + + +@pytest.fixture +def kanban_home(tmp_path, monkeypatch): + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +# --------------------------------------------------------------------------- +# DB layer +# --------------------------------------------------------------------------- + +def test_goal_mode_defaults_off(kanban_home): + with kb.connect() as conn: + tid = kb.create_task(conn, title="plain task", assignee="worker") + task = kb.get_task(conn, tid) + assert task.goal_mode is False + assert task.goal_max_turns is None + + +def test_goal_mode_persists(kanban_home): + with kb.connect() as conn: + tid = kb.create_task( + conn, + title="open-ended task", + assignee="worker", + goal_mode=True, + goal_max_turns=7, + ) + task = kb.get_task(conn, tid) + assert task.goal_mode is True + assert task.goal_max_turns == 7 + + +def test_goal_mode_without_max_turns(kanban_home): + with kb.connect() as conn: + tid = kb.create_task( + conn, title="t", assignee="worker", goal_mode=True + ) + task = kb.get_task(conn, tid) + assert task.goal_mode is True + assert task.goal_max_turns is None + + +def test_legacy_db_migrates_goal_columns(tmp_path, monkeypatch): + """A tasks table created without goal columns must gain them on init.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + db_path = kb.kanban_db_path() + db_path.parent.mkdir(parents=True, exist_ok=True) + # Minimal legacy schema: tasks table missing goal_mode / goal_max_turns. + legacy = sqlite3.connect(db_path) + legacy.execute( + """ + CREATE TABLE tasks ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + body TEXT, + assignee TEXT, + status TEXT NOT NULL DEFAULT 'ready', + priority INTEGER NOT NULL DEFAULT 0, + created_by TEXT, + created_at INTEGER NOT NULL, + started_at INTEGER, + completed_at INTEGER, + workspace_kind TEXT NOT NULL DEFAULT 'scratch', + workspace_path TEXT, + claim_lock TEXT, + claim_expires INTEGER + ) + """ + ) + legacy.execute( + "INSERT INTO tasks (id, title, status, priority, created_at, workspace_kind) " + "VALUES ('legacy1', 'old', 'ready', 0, 1, 'scratch')" + ) + legacy.commit() + legacy.close() + + # init_db runs the additive migration. + kb.init_db() + with kb.connect() as conn: + cols = {r["name"] for r in conn.execute("PRAGMA table_info(tasks)")} + assert "goal_mode" in cols + assert "goal_max_turns" in cols + task = kb.get_task(conn, "legacy1") + # Existing row keeps the safe default. + assert task.goal_mode is False + assert task.goal_max_turns is None + + +# --------------------------------------------------------------------------- +# Spawn env +# --------------------------------------------------------------------------- + +def test_spawn_sets_goal_env_only_when_enabled(kanban_home, monkeypatch): + captured = {} + + class _FakeProc: + pid = 4242 + + def _fake_popen(cmd, **kwargs): + captured["env"] = kwargs.get("env", {}) + return _FakeProc() + + monkeypatch.setattr("subprocess.Popen", _fake_popen) + # Avoid the kanban-worker skill probe touching the real skills dir. + monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda home: False) + + with kb.connect() as conn: + tid = kb.create_task( + conn, + title="goal task", + assignee="default", + goal_mode=True, + goal_max_turns=5, + ) + task = kb.get_task(conn, tid) + + kb._default_spawn(task, str(kanban_home)) + env = captured["env"] + assert env.get("HERMES_KANBAN_GOAL_MODE") == "1" + assert env.get("HERMES_KANBAN_GOAL_MAX_TURNS") == "5" + + +def test_spawn_no_goal_env_for_plain_task(kanban_home, monkeypatch): + captured = {} + + class _FakeProc: + pid = 4243 + + def _fake_popen(cmd, **kwargs): + captured["env"] = kwargs.get("env", {}) + return _FakeProc() + + monkeypatch.setattr("subprocess.Popen", _fake_popen) + monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda home: False) + + with kb.connect() as conn: + tid = kb.create_task(conn, title="plain", assignee="default") + task = kb.get_task(conn, tid) + + kb._default_spawn(task, str(kanban_home)) + env = captured["env"] + assert "HERMES_KANBAN_GOAL_MODE" not in env + assert "HERMES_KANBAN_GOAL_MAX_TURNS" not in env + + +# --------------------------------------------------------------------------- +# Goal loop logic (callback-injected, no live model) +# --------------------------------------------------------------------------- + +def _patch_judge(monkeypatch, verdicts): + """Make judge_goal return a scripted sequence of verdicts.""" + seq = list(verdicts) + + def _fake_judge(goal, response, subgoals=None): + v = seq.pop(0) if seq else "done" + return v, f"scripted:{v}", False + + monkeypatch.setattr(goals, "judge_goal", _fake_judge) + + +def test_loop_stops_when_worker_already_completed(monkeypatch): + # Worker called kanban_complete on its first turn — no judging needed. + _patch_judge(monkeypatch, ["continue"]) # should never be consulted + turns = [] + + res = goals.run_kanban_goal_loop( + task_id="t1", + goal_text="do the thing", + run_turn=lambda p: turns.append(p) or "x", + task_status_fn=lambda: "done", + block_fn=lambda r: pytest.fail("should not block"), + first_response="done already", + ) + assert res["outcome"] == "completed_by_worker" + assert turns == [] # no extra turns + + +def test_loop_continues_then_worker_completes(monkeypatch): + _patch_judge(monkeypatch, ["continue", "continue"]) + statuses = iter(["running", "running", "done"]) + turns = [] + + res = goals.run_kanban_goal_loop( + task_id="t2", + goal_text="ship feature", + run_turn=lambda p: turns.append(p) or f"turn{len(turns)}", + task_status_fn=lambda: next(statuses), + block_fn=lambda r: pytest.fail("should not block"), + max_turns=10, + first_response="started", + ) + assert res["outcome"] == "completed_by_worker" + # Two continuation turns fed before the worker completed. + assert len(turns) == 2 + assert all("not done yet" in p for p in turns) + + +def test_loop_blocks_on_budget_exhaustion(monkeypatch): + _patch_judge(monkeypatch, ["continue"] * 10) + blocked = {} + + def _block(reason): + blocked["reason"] = reason + + res = goals.run_kanban_goal_loop( + task_id="t3", + goal_text="endless task", + run_turn=lambda p: "still going", + task_status_fn=lambda: "running", + block_fn=_block, + max_turns=3, + first_response="turn1", + ) + assert res["outcome"] == "blocked_budget" + assert res["turns_used"] == 3 + assert "turn budget" in blocked["reason"].lower() + + +def test_loop_finalize_nudge_when_judge_done_but_open(monkeypatch): + # Judge says done, but worker never terminated → one finalize nudge, + # then worker completes. + _patch_judge(monkeypatch, ["done", "done"]) + statuses = iter(["running", "done"]) + turns = [] + + res = goals.run_kanban_goal_loop( + task_id="t4", + goal_text="task", + run_turn=lambda p: turns.append(p) or "ok", + task_status_fn=lambda: next(statuses), + block_fn=lambda r: pytest.fail("should not block"), + max_turns=10, + first_response="looks done", + ) + assert res["outcome"] == "completed_by_worker" + assert len(turns) == 1 + assert "still open" in turns[0] + + +def test_loop_blocks_when_judge_done_but_never_finalizes(monkeypatch): + # Judge keeps saying done, worker never calls kanban_complete → block + # after the single finalize nudge. + _patch_judge(monkeypatch, ["done", "done"]) + blocked = {} + + res = goals.run_kanban_goal_loop( + task_id="t5", + goal_text="task", + run_turn=lambda p: "still not finalizing", + task_status_fn=lambda: "running", + block_fn=lambda r: blocked.update(reason=r), + max_turns=10, + first_response="looks done", + ) + assert res["outcome"] == "blocked_budget" + assert "finalize" in blocked["reason"].lower() + + +def test_loop_stops_if_task_reclaimed(monkeypatch): + _patch_judge(monkeypatch, ["continue"]) + res = goals.run_kanban_goal_loop( + task_id="t6", + goal_text="task", + run_turn=lambda p: pytest.fail("should not run a turn"), + task_status_fn=lambda: "archived", + block_fn=lambda r: pytest.fail("should not block"), + first_response="x", + ) + assert res["outcome"] == "stopped" diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py index d3493f0f8..3b4ede304 100644 --- a/tools/kanban_tools.py +++ b/tools/kanban_tools.py @@ -759,6 +759,10 @@ def _handle_create(args: dict, **kw) -> str: return tool_error( f"skills must be a list of skill names, got {type(skills).__name__}" ) + goal_mode, goal_bool_error = _parse_bool_arg(args, "goal_mode") + if goal_bool_error: + return tool_error(goal_bool_error) + goal_max_turns = args.get("goal_max_turns") if isinstance(parents, str): parents = [parents] if not isinstance(parents, (list, tuple)): @@ -786,6 +790,10 @@ def _handle_create(args: dict, **kw) -> str: if max_runtime_seconds is not None else None ), skills=skills, + goal_mode=goal_mode, + goal_max_turns=( + int(goal_max_turns) if goal_max_turns is not None else None + ), initial_status=str(initial_status), created_by=os.environ.get("HERMES_PROFILE") or "worker", session_id=session_id, @@ -1250,6 +1258,29 @@ KANBAN_CREATE_SCHEMA = { "assignee's profile." ), }, + "goal_mode": { + "type": "boolean", + "description": ( + "Run the dispatched worker in a goal loop. When true, " + "after each turn an auxiliary judge checks the worker's " + "response against this card's title/body; if the work " + "isn't done and budget remains, the worker keeps going " + "in the same session until the judge agrees it's " + "complete (or the goal-turn budget is exhausted, which " + "blocks the task for human review). Use this for " + "open-ended cards where one shot rarely finishes the " + "work. Defaults to false (classic single-shot worker)." + ), + }, + "goal_max_turns": { + "type": "integer", + "description": ( + "Turn budget for goal_mode workers. Caps how many " + "continuation turns the worker may take before the task " + "is blocked for review. Ignored unless goal_mode is " + "true. Defaults to the goal-engine default (20)." + ), + }, "board": _board_schema_prop(), }, "required": ["title", "assignee"], diff --git a/website/docs/user-guide/features/kanban.md b/website/docs/user-guide/features/kanban.md index 0192f9c64..4c8ae55e8 100644 --- a/website/docs/user-guide/features/kanban.md +++ b/website/docs/user-guide/features/kanban.md @@ -428,6 +428,20 @@ hermes kanban create "audit auth flow" \ These skills are **additive** to the built-in `kanban-worker` — the dispatcher emits one `--skills ` flag for each (and for the built-in), so the worker spawns with all of them loaded. The skill names must match skills that are actually installed on the assignee's profile (run `hermes skills list` to see what's available); there's no runtime install. +### Goal-mode cards (`--goal`) + +By default each worker gets **one shot** at its card — do the work, call `kanban_complete`/`kanban_block`, exit. Pass `--goal` (CLI) or `goal_mode=True` (the `kanban_create` tool / dashboard) to instead run that worker in a **goal loop**, the same Ralph-style engine behind the `/goal` slash command: after every turn an auxiliary judge checks the worker's output against the card's title + body (treated as the acceptance criteria), and if the work isn't done — and the turn budget remains — the worker keeps going **in the same session** until the judge agrees, the worker terminates the task itself, or the budget runs out (which **blocks** the card for human review rather than exiting silently). + +```bash +hermes kanban create "Translate the docs site to French" \ + --body "Acceptance: every page translated, no English left, links intact." \ + --assignee linguist \ + --goal \ + --goal-max-turns 15 # optional; default 20 +``` + +Use it for open-ended, multi-step, or "keep going until X is true" cards. Skip it for cheap one-shot work — the per-turn judge overhead isn't worth it, and the dispatcher's existing retry/circuit-breaker already handles transient worker failures. The judge is only as good as your goal text, so write the body as **explicit acceptance criteria**. + ### The orchestrator skill A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to one of the profiles you've set up, and steps back. The `kanban-orchestrator` skill encodes this as tool-call patterns: anti-temptation rules, a Step-0 profile-discovery prompt (the dispatcher silently fails on unknown assignee names, so the orchestrator must ground every card in profiles that actually exist on your machine), and a decomposition playbook keyed on `kanban_create` / `kanban_link` / `kanban_comment`. @@ -632,6 +646,7 @@ hermes kanban create "" [--body ...] [--assignee <profile>] [--priority N] [--triage] [--idempotency-key KEY] [--max-runtime 30m|2h|1d|<seconds>] [--max-retries N] + [--goal] [--goal-max-turns N] [--skill <name>]... [--json] hermes kanban list [--mine] [--assignee P] [--status S] [--tenant T] [--archived]