fix(cron): re-validate stale cron-output entries before deletion (#37721)

quick() and dry_run() previously trusted the stored category from
tracked.json without re-validating at delete time. Stale entries from
before #34840 could carry category="cron-output" for cron control-plane
paths (e.g. cron/jobs.json), causing quick() to delete the live
scheduler registry.

Fix:
- Fix guess_category() to only classify cron/output/** as cron-output
  (was classifying ALL cron/* paths, missing the #34840 fix).
- Re-validate cron-output entries via guess_category() at delete time
  in quick() and dry_run(); stale entries that are no longer classified
  as cron-output are skipped and removed from tracked.json.
- Add _is_protected_cron_path() as a hard defense-in-depth guard that
  blocks deletion of cron/cronjobs directories and known control-plane
  files (jobs.json, .tick.lock) regardless of stored category.
- Update test_cron_subtree_categorised to match fixed guess_category
  (only cron/output/* is cron-output, not all of cron/).

Tests: add 5 regression tests in TestStaleCronEntryMigration.
This commit is contained in:
kyssta-exe
2026-06-03 04:10:57 +00:00
committed by Teknium
parent 693f4c7e9c
commit 30412a9771
2 changed files with 186 additions and 0 deletions

View File

@ -170,6 +170,135 @@ class TestGuessCategory:
assert dg.guess_category(p) is None
class TestStaleCronEntryMigration:
"""Regression tests for #37721 — stale cron-output entries in tracked.json."""
def test_quick_skips_stale_cron_output_for_jobs_json(self, _isolate_env):
"""A stale tracked.json entry with category="cron-output" for
cron/jobs.json must NOT be deleted by quick().
This is the exact scenario from #37721: an old tracked.json has
{"path": ".../cron/jobs.json", "category": "cron-output"} which
would pass the delete filter but must be skipped because
guess_category() now returns None for non-output cron paths.
"""
dg = _load_lib()
cron_dir = _isolate_env / "cron"
cron_dir.mkdir()
jobs_json = cron_dir / "jobs.json"
jobs_json.write_text('{"jobs": []}')
# Simulate a stale tracked.json entry from before #34840 by
# directly writing the tracked file (track() would reject it).
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
tracked_file.parent.mkdir(parents=True, exist_ok=True)
tracked_file.write_text(json.dumps([{
"path": str(jobs_json),
"category": "cron-output",
"timestamp": "2025-01-01T00:00:00+00:00", # very old
"size": 123,
}]))
summary = dg.quick()
assert summary["deleted"] == 0, "cron/jobs.json must not be deleted"
assert jobs_json.exists(), "jobs.json must still exist"
# The stale entry should have been dropped from tracking.
remaining = json.loads(tracked_file.read_text())
assert len(remaining) == 0
def test_quick_skips_stale_cron_output_for_cron_dir(self, _isolate_env):
"""Stale entry for the cron/ directory itself must not be deleted."""
dg = _load_lib()
cron_dir = _isolate_env / "cron"
cron_dir.mkdir()
output_dir = cron_dir / "output"
output_dir.mkdir()
(output_dir / "run.md").write_text("x")
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
tracked_file.parent.mkdir(parents=True, exist_ok=True)
tracked_file.write_text(json.dumps([{
"path": str(cron_dir),
"category": "cron-output",
"timestamp": "2025-01-01T00:00:00+00:00",
"size": 0,
}]))
summary = dg.quick()
assert summary["deleted"] == 0, "cron/ dir must not be deleted"
assert cron_dir.exists()
def test_quick_skips_protected_cron_paths_defense_in_depth(self, _isolate_env):
"""Defense-in-depth: even if guess_category returned cron-output
(hypothetically), protected cron paths are never deleted."""
dg = _load_lib()
cron_dir = _isolate_env / "cron"
cron_dir.mkdir()
tick_lock = cron_dir / ".tick.lock"
tick_lock.write_text("")
# Manually inject a stale entry with "test" category (would normally
# be auto-deleted) — the protected path guard must still block it.
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
tracked_file.parent.mkdir(parents=True, exist_ok=True)
tracked_file.write_text(json.dumps([{
"path": str(tick_lock),
"category": "test",
"timestamp": "2025-01-01T00:00:00+00:00",
"size": 0,
}]))
summary = dg.quick()
assert summary["deleted"] == 0, ".tick.lock must not be deleted"
assert tick_lock.exists()
def test_dry_run_omits_stale_cron_output(self, _isolate_env):
"""dry_run() should also skip stale cron-output entries."""
dg = _load_lib()
cron_dir = _isolate_env / "cron"
cron_dir.mkdir()
jobs_json = cron_dir / "jobs.json"
jobs_json.write_text("[]")
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
tracked_file.parent.mkdir(parents=True, exist_ok=True)
tracked_file.write_text(json.dumps([{
"path": str(jobs_json),
"category": "cron-output",
"timestamp": "2025-01-01T00:00:00+00:00",
"size": 123,
}]))
auto, prompt = dg.dry_run()
assert len(auto) == 0, "stale cron-output for jobs.json must not appear"
assert len(prompt) == 0
def test_legitimate_cron_output_still_deleted(self, _isolate_env):
"""A valid cron-output entry under cron/output/ must still be deleted."""
dg = _load_lib()
output_dir = _isolate_env / "cron" / "output" / "job_1"
output_dir.mkdir(parents=True)
run_md = output_dir / "run.md"
run_md.write_text("x")
# Old enough to be deleted (>14 days)
from datetime import datetime, timezone, timedelta
old_ts = (datetime.now(timezone.utc) - timedelta(days=20)).isoformat()
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
tracked_file.parent.mkdir(parents=True, exist_ok=True)
tracked_file.write_text(json.dumps([{
"path": str(run_md),
"category": "cron-output",
"timestamp": old_ts,
"size": 10,
}]))
summary = dg.quick()
assert summary["deleted"] == 1, "valid old cron-output should be deleted"
assert not run_md.exists()
class TestTrackForgetQuick:
def test_track_then_quick_deletes_test(self, _isolate_env):
dg = _load_lib()