diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py index 206808267..0c6bf8692 100644 --- a/hermes_cli/backup.py +++ b/hermes_cli/backup.py @@ -670,6 +670,105 @@ def restore_quick_snapshot( return restored > 0 +# Relative path of the cron job database inside HERMES_HOME. Kept in sync with +# the entry in ``_QUICK_STATE_FILES`` and with ``cron/jobs.py``'s ``JOBS_FILE``. +_CRON_JOBS_REL = "cron/jobs.json" + + +def _count_cron_jobs(path: Path) -> Optional[int]: + """Return the number of cron jobs stored in ``path``. + + The canonical on-disk shape is ``{"jobs": [...]}`` (see ``cron/jobs.py``). + A legacy bare-list shape (``[...]``) is also honoured. + + Returns: + The job count for any *valid, readable* JSON document, or ``None`` if + the file is missing or cannot be parsed. ``None`` means "unknown" — + callers must not treat it as "zero jobs", because acting on an + unreadable file could mask a real corruption the user needs to see. + """ + if not path.is_file(): + return None + try: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + except (OSError, json.JSONDecodeError): + return None + if isinstance(data, dict): + jobs = data.get("jobs", []) + return len(jobs) if isinstance(jobs, list) else None + if isinstance(data, list): + return len(data) + return None + + +def restore_cron_jobs_if_emptied( + snapshot_id: str, + hermes_home: Optional[Path] = None, +) -> Optional[Dict[str, Any]]: + """Safety net for silent cron-job loss across ``hermes update``. + + Config-version migrations have been observed to leave ``cron/jobs.json`` + valid-but-empty after an update, silently dropping every scheduled job + (issue #34600). The existing malformed-shape guards in ``cron/jobs.py`` + don't catch this case because ``{"jobs": []}`` is perfectly valid JSON. + + This compares the *current* job count against the pre-update snapshot. If + the live file now has **zero** jobs while the snapshot captured **one or + more**, the snapshot copy of ``cron/jobs.json`` is restored in place. + + The check is deliberately conservative — it only ever restores when there + is unambiguous evidence of loss (snapshot had jobs, live file has none), + so a user who genuinely deleted all their jobs during/after the update is + never second-guessed, and an unreadable live file (count ``None``) is left + untouched so real corruption still surfaces. + + Args: + snapshot_id: The pre-update quick-snapshot id (from + :func:`create_quick_snapshot`). + hermes_home: Override for the Hermes home directory (tests). + + Returns: + ``None`` when no action was taken (the common, healthy path). On a + successful restore, a dict ``{"restored": True, "job_count": N, + "snapshot_id": ...}`` so the caller can warn the user. + """ + if not snapshot_id: + return None + + home = hermes_home or get_hermes_home() + live_path = home / _CRON_JOBS_REL + + live_count = _count_cron_jobs(live_path) + # Only act when the live file is readable AND empty. ``None`` (missing or + # unparseable) is intentionally left alone — that's a different failure + # mode the user should see rather than have papered over. + if live_count is None or live_count > 0: + return None + + snap_path = _quick_snapshot_root(home) / snapshot_id / _CRON_JOBS_REL + snap_count = _count_cron_jobs(snap_path) + if not snap_count: # None or 0 — nothing worth restoring + return None + + try: + live_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(snap_path, live_path) + except (OSError, PermissionError) as exc: + logger.error( + "Cron jobs were emptied during update but auto-restore failed: %s", exc + ) + return None + + logger.warning( + "Restored %d cron job(s) from pre-update snapshot %s " + "(cron/jobs.json was emptied during migration)", + snap_count, + snapshot_id, + ) + return {"restored": True, "job_count": snap_count, "snapshot_id": snapshot_id} + + def _prune_quick_snapshots(root: Path, keep: int = _QUICK_DEFAULT_KEEP) -> int: """Remove oldest quick snapshots beyond the keep limit. Returns count deleted.""" if not root.exists(): diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 8540048aa..76bd12a53 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -9125,12 +9125,13 @@ def _cmd_update_impl(args, gateway_mode: bool): # though `git pull` can't touch $HERMES_HOME, this is cheap # belt-and-suspenders insurance and gives the user something to # restore from via `/snapshot list` / `/snapshot restore `. + pre_update_snapshot_id = None try: from hermes_cli.backup import create_quick_snapshot - snap_id = create_quick_snapshot(label="pre-update", keep=1) - if snap_id: - print(f" ✓ Pre-update snapshot: {snap_id}") + pre_update_snapshot_id = create_quick_snapshot(label="pre-update", keep=1) + if pre_update_snapshot_id: + print(f" ✓ Pre-update snapshot: {pre_update_snapshot_id}") except Exception as exc: # Never let a snapshot failure block an update. logger.debug("Pre-update snapshot failed: %s", exc) @@ -9467,6 +9468,25 @@ def _cmd_update_impl(args, gateway_mode: bool): else: print(" ✓ Configuration is up to date") + # Safety net: config-version migrations have been observed to leave + # cron/jobs.json valid-but-empty, silently dropping every scheduled + # job (issue #34600). If the live file is now empty while the + # pre-update snapshot held jobs, restore it and warn loudly. + try: + from hermes_cli.backup import restore_cron_jobs_if_emptied + + cron_restore = restore_cron_jobs_if_emptied(pre_update_snapshot_id) + if cron_restore: + print() + print( + " ⚠️ cron/jobs.json was emptied during this update — " + f"restored {cron_restore['job_count']} job(s) from " + f"pre-update snapshot {cron_restore['snapshot_id']}." + ) + except Exception as exc: + # Never let the cron safety net break an otherwise-good update. + logger.debug("Cron jobs auto-restore check failed: %s", exc) + print() print("✓ Update complete!") diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py index a8657d770..8c0f2a398 100644 --- a/tests/hermes_cli/test_backup.py +++ b/tests/hermes_cli/test_backup.py @@ -1679,3 +1679,105 @@ class TestPreMigrationBackup: _t.sleep(1.05) # Update backup must still be there assert update_backup.exists(), "pre-migration rotation wrongly pruned the pre-update backup" + + +# --------------------------------------------------------------------------- +# Cron jobs auto-restore after silent migration loss (issue #34600) +# --------------------------------------------------------------------------- + +class TestRestoreCronJobsIfEmptied: + """`hermes update` config migration can leave cron/jobs.json valid-but-empty, + silently dropping every scheduled job. `restore_cron_jobs_if_emptied` is the + post-migration safety net that restores from the pre-update snapshot.""" + + @staticmethod + def _seed_jobs(path: Path, jobs): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps({"jobs": jobs})) + + def _make_snapshot(self, hermes_home: Path, label="pre-update"): + from hermes_cli.backup import create_quick_snapshot + return create_quick_snapshot(label=label, hermes_home=hermes_home, keep=5) + + def test_restores_when_emptied_after_migration(self, tmp_path): + from hermes_cli.backup import restore_cron_jobs_if_emptied + hermes_home = tmp_path / ".hermes" + jobs_path = hermes_home / "cron" / "jobs.json" + # Pre-update: 3 real jobs. + self._seed_jobs(jobs_path, [{"id": "a"}, {"id": "b"}, {"id": "c"}]) + snap_id = self._make_snapshot(hermes_home) + assert snap_id + + # Migration silently empties the file (valid JSON, zero jobs). + jobs_path.write_text(json.dumps({"jobs": []})) + + result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home) + assert result is not None + assert result["restored"] is True + assert result["job_count"] == 3 + assert result["snapshot_id"] == snap_id + + # The live file now has the jobs back. + restored = json.loads(jobs_path.read_text()) + assert len(restored["jobs"]) == 3 + + def test_noop_when_live_file_still_has_jobs(self, tmp_path): + from hermes_cli.backup import restore_cron_jobs_if_emptied + hermes_home = tmp_path / ".hermes" + jobs_path = hermes_home / "cron" / "jobs.json" + self._seed_jobs(jobs_path, [{"id": "a"}, {"id": "b"}]) + snap_id = self._make_snapshot(hermes_home) + + # Healthy path: file unchanged after update. + result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home) + assert result is None + + def test_noop_when_snapshot_had_no_jobs(self, tmp_path): + from hermes_cli.backup import restore_cron_jobs_if_emptied + hermes_home = tmp_path / ".hermes" + jobs_path = hermes_home / "cron" / "jobs.json" + # Pre-update genuinely had zero jobs; current is also empty. + self._seed_jobs(jobs_path, []) + snap_id = self._make_snapshot(hermes_home) + jobs_path.write_text(json.dumps({"jobs": []})) + + result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home) + assert result is None + + def test_noop_when_live_file_unreadable(self, tmp_path): + """An unparseable live file is left alone — that's a different failure + mode the user should see, not silently overwrite.""" + from hermes_cli.backup import restore_cron_jobs_if_emptied + hermes_home = tmp_path / ".hermes" + jobs_path = hermes_home / "cron" / "jobs.json" + self._seed_jobs(jobs_path, [{"id": "a"}]) + snap_id = self._make_snapshot(hermes_home) + jobs_path.write_text("{ this is not valid json") + + result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home) + assert result is None + # File left untouched. + assert jobs_path.read_text() == "{ this is not valid json" + + def test_noop_when_snapshot_id_missing(self, tmp_path): + from hermes_cli.backup import restore_cron_jobs_if_emptied + hermes_home = tmp_path / ".hermes" + jobs_path = hermes_home / "cron" / "jobs.json" + self._seed_jobs(jobs_path, []) + assert restore_cron_jobs_if_emptied(None, hermes_home=hermes_home) is None + assert restore_cron_jobs_if_emptied("", hermes_home=hermes_home) is None + + def test_restores_legacy_bare_list_snapshot_shape(self, tmp_path): + """A legacy snapshot storing a bare JSON list (not {"jobs": [...]}) is + still counted and restored.""" + from hermes_cli.backup import restore_cron_jobs_if_emptied + hermes_home = tmp_path / ".hermes" + jobs_path = hermes_home / "cron" / "jobs.json" + jobs_path.parent.mkdir(parents=True, exist_ok=True) + jobs_path.write_text(json.dumps([{"id": "a"}, {"id": "b"}])) + snap_id = self._make_snapshot(hermes_home) + + jobs_path.write_text(json.dumps({"jobs": []})) + result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home) + assert result is not None + assert result["job_count"] == 2