From 30412a9771cc81861c58a94fdb64fc49036ef307 Mon Sep 17 00:00:00 2001 From: kyssta-exe Date: Wed, 3 Jun 2026 04:10:57 +0000 Subject: [PATCH] fix(cron): re-validate stale cron-output entries before deletion (#37721) quick() and dry_run() previously trusted the stored category from tracked.json without re-validating at delete time. Stale entries from before #34840 could carry category="cron-output" for cron control-plane paths (e.g. cron/jobs.json), causing quick() to delete the live scheduler registry. Fix: - Fix guess_category() to only classify cron/output/** as cron-output (was classifying ALL cron/* paths, missing the #34840 fix). - Re-validate cron-output entries via guess_category() at delete time in quick() and dry_run(); stale entries that are no longer classified as cron-output are skipped and removed from tracked.json. - Add _is_protected_cron_path() as a hard defense-in-depth guard that blocks deletion of cron/cronjobs directories and known control-plane files (jobs.json, .tick.lock) regardless of stored category. - Update test_cron_subtree_categorised to match fixed guess_category (only cron/output/* is cron-output, not all of cron/). Tests: add 5 regression tests in TestStaleCronEntryMigration. --- plugins/disk-cleanup/disk_cleanup.py | 57 ++++++++++ tests/plugins/test_disk_cleanup_plugin.py | 129 ++++++++++++++++++++++ 2 files changed, 186 insertions(+) diff --git a/plugins/disk-cleanup/disk_cleanup.py b/plugins/disk-cleanup/disk_cleanup.py index 8d984273e..fddb62dac 100755 --- a/plugins/disk-cleanup/disk_cleanup.py +++ b/plugins/disk-cleanup/disk_cleanup.py @@ -145,6 +145,33 @@ ALLOWED_CATEGORIES = { } +# Paths under $HERMES_HOME that must NEVER be deleted by quick(), +# regardless of what the stored category says. This is a defense-in-depth +# guard against stale tracked.json entries from before #34840. +_PROTECTED_CRON_PATHS: set[str] = set() + + +def _is_protected_cron_path(p: Path) -> bool: + """Return True if *p* is a cron control-plane file/directory that must + never be deleted. + + This only matches the directory itself and known control-plane files + (``jobs.json``, ``.tick.lock``) — it does NOT blanket-protect + everything under ``cron/`` because ``cron/output/`` is disposable. + """ + # Lazily build the set once per process so HERMES_HOME is resolved + # exactly once. + if not _PROTECTED_CRON_PATHS: + hermes_home = get_hermes_home() + for parent in ("cron", "cronjobs"): + base = hermes_home / parent + _PROTECTED_CRON_PATHS.add(str(base)) + _PROTECTED_CRON_PATHS.add(str(base / "jobs.json")) + _PROTECTED_CRON_PATHS.add(str(base / ".tick.lock")) + resolved = str(p.resolve()) + return resolved in _PROTECTED_CRON_PATHS + + def fmt_size(n: float) -> str: for unit in ("B", "KB", "MB", "GB", "TB"): if n < 1024: @@ -226,6 +253,14 @@ def dry_run() -> Tuple[List[Dict], List[Dict]]: cat = item["category"] size = item["size"] + # Re-validate stale "cron-output" entries (fixes #37721). + if cat == "cron-output": + re_cat = guess_category(p) + if re_cat != "cron-output": + # Stale entry — would be skipped by quick(); omit from + # dry-run output too. + continue + if cat == "test": auto.append(item) elif cat == "temp" and age > 7: @@ -269,6 +304,28 @@ def quick() -> Dict[str, Any]: age = (now - datetime.fromisoformat(item["timestamp"])).days + # ---- stale-state migration (fixes #37721) ---- + # Old tracked.json entries may carry a "cron-output" category for + # paths that are NOT under cron/output/ (e.g. cron/jobs.json). + # guess_category() was fixed in #34840, but existing entries are + # never re-validated. Re-classify here so stale entries for cron + # control-plane state are not deleted. + if cat == "cron-output": + re_cat = guess_category(p) + if re_cat != "cron-output": + _log( + f"SKIP stale cron-output entry: {p} " + f"(re-classified as {re_cat!r})" + ) + # Drop the stale entry — it was misclassified. + continue + + # Hard safety net: never delete cron control-plane state even if + # the category somehow slipped through re-validation above. + if _is_protected_cron_path(p): + _log(f"SKIP protected cron path: {p}") + continue + should_delete = ( cat == "test" or (cat == "temp" and age > 7) diff --git a/tests/plugins/test_disk_cleanup_plugin.py b/tests/plugins/test_disk_cleanup_plugin.py index 4f7f66e02..783644d38 100644 --- a/tests/plugins/test_disk_cleanup_plugin.py +++ b/tests/plugins/test_disk_cleanup_plugin.py @@ -170,6 +170,135 @@ class TestGuessCategory: assert dg.guess_category(p) is None +class TestStaleCronEntryMigration: + """Regression tests for #37721 — stale cron-output entries in tracked.json.""" + + def test_quick_skips_stale_cron_output_for_jobs_json(self, _isolate_env): + """A stale tracked.json entry with category="cron-output" for + cron/jobs.json must NOT be deleted by quick(). + + This is the exact scenario from #37721: an old tracked.json has + {"path": ".../cron/jobs.json", "category": "cron-output"} which + would pass the delete filter but must be skipped because + guess_category() now returns None for non-output cron paths. + """ + dg = _load_lib() + cron_dir = _isolate_env / "cron" + cron_dir.mkdir() + jobs_json = cron_dir / "jobs.json" + jobs_json.write_text('{"jobs": []}') + + # Simulate a stale tracked.json entry from before #34840 by + # directly writing the tracked file (track() would reject it). + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(jobs_json), + "category": "cron-output", + "timestamp": "2025-01-01T00:00:00+00:00", # very old + "size": 123, + }])) + + summary = dg.quick() + assert summary["deleted"] == 0, "cron/jobs.json must not be deleted" + assert jobs_json.exists(), "jobs.json must still exist" + # The stale entry should have been dropped from tracking. + remaining = json.loads(tracked_file.read_text()) + assert len(remaining) == 0 + + def test_quick_skips_stale_cron_output_for_cron_dir(self, _isolate_env): + """Stale entry for the cron/ directory itself must not be deleted.""" + dg = _load_lib() + cron_dir = _isolate_env / "cron" + cron_dir.mkdir() + output_dir = cron_dir / "output" + output_dir.mkdir() + (output_dir / "run.md").write_text("x") + + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(cron_dir), + "category": "cron-output", + "timestamp": "2025-01-01T00:00:00+00:00", + "size": 0, + }])) + + summary = dg.quick() + assert summary["deleted"] == 0, "cron/ dir must not be deleted" + assert cron_dir.exists() + + def test_quick_skips_protected_cron_paths_defense_in_depth(self, _isolate_env): + """Defense-in-depth: even if guess_category returned cron-output + (hypothetically), protected cron paths are never deleted.""" + dg = _load_lib() + cron_dir = _isolate_env / "cron" + cron_dir.mkdir() + tick_lock = cron_dir / ".tick.lock" + tick_lock.write_text("") + + # Manually inject a stale entry with "test" category (would normally + # be auto-deleted) — the protected path guard must still block it. + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(tick_lock), + "category": "test", + "timestamp": "2025-01-01T00:00:00+00:00", + "size": 0, + }])) + + summary = dg.quick() + assert summary["deleted"] == 0, ".tick.lock must not be deleted" + assert tick_lock.exists() + + def test_dry_run_omits_stale_cron_output(self, _isolate_env): + """dry_run() should also skip stale cron-output entries.""" + dg = _load_lib() + cron_dir = _isolate_env / "cron" + cron_dir.mkdir() + jobs_json = cron_dir / "jobs.json" + jobs_json.write_text("[]") + + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(jobs_json), + "category": "cron-output", + "timestamp": "2025-01-01T00:00:00+00:00", + "size": 123, + }])) + + auto, prompt = dg.dry_run() + assert len(auto) == 0, "stale cron-output for jobs.json must not appear" + assert len(prompt) == 0 + + def test_legitimate_cron_output_still_deleted(self, _isolate_env): + """A valid cron-output entry under cron/output/ must still be deleted.""" + dg = _load_lib() + output_dir = _isolate_env / "cron" / "output" / "job_1" + output_dir.mkdir(parents=True) + run_md = output_dir / "run.md" + run_md.write_text("x") + + # Old enough to be deleted (>14 days) + from datetime import datetime, timezone, timedelta + old_ts = (datetime.now(timezone.utc) - timedelta(days=20)).isoformat() + + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(run_md), + "category": "cron-output", + "timestamp": old_ts, + "size": 10, + }])) + + summary = dg.quick() + assert summary["deleted"] == 1, "valid old cron-output should be deleted" + assert not run_md.exists() + + class TestTrackForgetQuick: def test_track_then_quick_deletes_test(self, _isolate_env): dg = _load_lib()