diff --git a/plugins/disk-cleanup/disk_cleanup.py b/plugins/disk-cleanup/disk_cleanup.py index 8d984273e..fddb62dac 100755 --- a/plugins/disk-cleanup/disk_cleanup.py +++ b/plugins/disk-cleanup/disk_cleanup.py @@ -145,6 +145,33 @@ ALLOWED_CATEGORIES = { } +# Paths under $HERMES_HOME that must NEVER be deleted by quick(), +# regardless of what the stored category says. This is a defense-in-depth +# guard against stale tracked.json entries from before #34840. +_PROTECTED_CRON_PATHS: set[str] = set() + + +def _is_protected_cron_path(p: Path) -> bool: + """Return True if *p* is a cron control-plane file/directory that must + never be deleted. + + This only matches the directory itself and known control-plane files + (``jobs.json``, ``.tick.lock``) — it does NOT blanket-protect + everything under ``cron/`` because ``cron/output/`` is disposable. + """ + # Lazily build the set once per process so HERMES_HOME is resolved + # exactly once. + if not _PROTECTED_CRON_PATHS: + hermes_home = get_hermes_home() + for parent in ("cron", "cronjobs"): + base = hermes_home / parent + _PROTECTED_CRON_PATHS.add(str(base)) + _PROTECTED_CRON_PATHS.add(str(base / "jobs.json")) + _PROTECTED_CRON_PATHS.add(str(base / ".tick.lock")) + resolved = str(p.resolve()) + return resolved in _PROTECTED_CRON_PATHS + + def fmt_size(n: float) -> str: for unit in ("B", "KB", "MB", "GB", "TB"): if n < 1024: @@ -226,6 +253,14 @@ def dry_run() -> Tuple[List[Dict], List[Dict]]: cat = item["category"] size = item["size"] + # Re-validate stale "cron-output" entries (fixes #37721). + if cat == "cron-output": + re_cat = guess_category(p) + if re_cat != "cron-output": + # Stale entry — would be skipped by quick(); omit from + # dry-run output too. + continue + if cat == "test": auto.append(item) elif cat == "temp" and age > 7: @@ -269,6 +304,28 @@ def quick() -> Dict[str, Any]: age = (now - datetime.fromisoformat(item["timestamp"])).days + # ---- stale-state migration (fixes #37721) ---- + # Old tracked.json entries may carry a "cron-output" category for + # paths that are NOT under cron/output/ (e.g. cron/jobs.json). + # guess_category() was fixed in #34840, but existing entries are + # never re-validated. Re-classify here so stale entries for cron + # control-plane state are not deleted. + if cat == "cron-output": + re_cat = guess_category(p) + if re_cat != "cron-output": + _log( + f"SKIP stale cron-output entry: {p} " + f"(re-classified as {re_cat!r})" + ) + # Drop the stale entry — it was misclassified. + continue + + # Hard safety net: never delete cron control-plane state even if + # the category somehow slipped through re-validation above. + if _is_protected_cron_path(p): + _log(f"SKIP protected cron path: {p}") + continue + should_delete = ( cat == "test" or (cat == "temp" and age > 7) diff --git a/tests/plugins/test_disk_cleanup_plugin.py b/tests/plugins/test_disk_cleanup_plugin.py index 4f7f66e02..783644d38 100644 --- a/tests/plugins/test_disk_cleanup_plugin.py +++ b/tests/plugins/test_disk_cleanup_plugin.py @@ -170,6 +170,135 @@ class TestGuessCategory: assert dg.guess_category(p) is None +class TestStaleCronEntryMigration: + """Regression tests for #37721 — stale cron-output entries in tracked.json.""" + + def test_quick_skips_stale_cron_output_for_jobs_json(self, _isolate_env): + """A stale tracked.json entry with category="cron-output" for + cron/jobs.json must NOT be deleted by quick(). + + This is the exact scenario from #37721: an old tracked.json has + {"path": ".../cron/jobs.json", "category": "cron-output"} which + would pass the delete filter but must be skipped because + guess_category() now returns None for non-output cron paths. + """ + dg = _load_lib() + cron_dir = _isolate_env / "cron" + cron_dir.mkdir() + jobs_json = cron_dir / "jobs.json" + jobs_json.write_text('{"jobs": []}') + + # Simulate a stale tracked.json entry from before #34840 by + # directly writing the tracked file (track() would reject it). + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(jobs_json), + "category": "cron-output", + "timestamp": "2025-01-01T00:00:00+00:00", # very old + "size": 123, + }])) + + summary = dg.quick() + assert summary["deleted"] == 0, "cron/jobs.json must not be deleted" + assert jobs_json.exists(), "jobs.json must still exist" + # The stale entry should have been dropped from tracking. + remaining = json.loads(tracked_file.read_text()) + assert len(remaining) == 0 + + def test_quick_skips_stale_cron_output_for_cron_dir(self, _isolate_env): + """Stale entry for the cron/ directory itself must not be deleted.""" + dg = _load_lib() + cron_dir = _isolate_env / "cron" + cron_dir.mkdir() + output_dir = cron_dir / "output" + output_dir.mkdir() + (output_dir / "run.md").write_text("x") + + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(cron_dir), + "category": "cron-output", + "timestamp": "2025-01-01T00:00:00+00:00", + "size": 0, + }])) + + summary = dg.quick() + assert summary["deleted"] == 0, "cron/ dir must not be deleted" + assert cron_dir.exists() + + def test_quick_skips_protected_cron_paths_defense_in_depth(self, _isolate_env): + """Defense-in-depth: even if guess_category returned cron-output + (hypothetically), protected cron paths are never deleted.""" + dg = _load_lib() + cron_dir = _isolate_env / "cron" + cron_dir.mkdir() + tick_lock = cron_dir / ".tick.lock" + tick_lock.write_text("") + + # Manually inject a stale entry with "test" category (would normally + # be auto-deleted) — the protected path guard must still block it. + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(tick_lock), + "category": "test", + "timestamp": "2025-01-01T00:00:00+00:00", + "size": 0, + }])) + + summary = dg.quick() + assert summary["deleted"] == 0, ".tick.lock must not be deleted" + assert tick_lock.exists() + + def test_dry_run_omits_stale_cron_output(self, _isolate_env): + """dry_run() should also skip stale cron-output entries.""" + dg = _load_lib() + cron_dir = _isolate_env / "cron" + cron_dir.mkdir() + jobs_json = cron_dir / "jobs.json" + jobs_json.write_text("[]") + + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(jobs_json), + "category": "cron-output", + "timestamp": "2025-01-01T00:00:00+00:00", + "size": 123, + }])) + + auto, prompt = dg.dry_run() + assert len(auto) == 0, "stale cron-output for jobs.json must not appear" + assert len(prompt) == 0 + + def test_legitimate_cron_output_still_deleted(self, _isolate_env): + """A valid cron-output entry under cron/output/ must still be deleted.""" + dg = _load_lib() + output_dir = _isolate_env / "cron" / "output" / "job_1" + output_dir.mkdir(parents=True) + run_md = output_dir / "run.md" + run_md.write_text("x") + + # Old enough to be deleted (>14 days) + from datetime import datetime, timezone, timedelta + old_ts = (datetime.now(timezone.utc) - timedelta(days=20)).isoformat() + + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(run_md), + "category": "cron-output", + "timestamp": old_ts, + "size": 10, + }])) + + summary = dg.quick() + assert summary["deleted"] == 1, "valid old cron-output should be deleted" + assert not run_md.exists() + + class TestTrackForgetQuick: def test_track_then_quick_deletes_test(self, _isolate_env): dg = _load_lib()