fix(cron): re-validate stale cron-output entries before deletion (#37721)
quick() and dry_run() previously trusted the stored category from tracked.json without re-validating at delete time. Stale entries from before #34840 could carry category="cron-output" for cron control-plane paths (e.g. cron/jobs.json), causing quick() to delete the live scheduler registry. Fix: - Fix guess_category() to only classify cron/output/** as cron-output (was classifying ALL cron/* paths, missing the #34840 fix). - Re-validate cron-output entries via guess_category() at delete time in quick() and dry_run(); stale entries that are no longer classified as cron-output are skipped and removed from tracked.json. - Add _is_protected_cron_path() as a hard defense-in-depth guard that blocks deletion of cron/cronjobs directories and known control-plane files (jobs.json, .tick.lock) regardless of stored category. - Update test_cron_subtree_categorised to match fixed guess_category (only cron/output/* is cron-output, not all of cron/). Tests: add 5 regression tests in TestStaleCronEntryMigration.
This commit is contained in:
@ -170,6 +170,135 @@ class TestGuessCategory:
|
||||
assert dg.guess_category(p) is None
|
||||
|
||||
|
||||
class TestStaleCronEntryMigration:
|
||||
"""Regression tests for #37721 — stale cron-output entries in tracked.json."""
|
||||
|
||||
def test_quick_skips_stale_cron_output_for_jobs_json(self, _isolate_env):
|
||||
"""A stale tracked.json entry with category="cron-output" for
|
||||
cron/jobs.json must NOT be deleted by quick().
|
||||
|
||||
This is the exact scenario from #37721: an old tracked.json has
|
||||
{"path": ".../cron/jobs.json", "category": "cron-output"} which
|
||||
would pass the delete filter but must be skipped because
|
||||
guess_category() now returns None for non-output cron paths.
|
||||
"""
|
||||
dg = _load_lib()
|
||||
cron_dir = _isolate_env / "cron"
|
||||
cron_dir.mkdir()
|
||||
jobs_json = cron_dir / "jobs.json"
|
||||
jobs_json.write_text('{"jobs": []}')
|
||||
|
||||
# Simulate a stale tracked.json entry from before #34840 by
|
||||
# directly writing the tracked file (track() would reject it).
|
||||
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
|
||||
tracked_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
tracked_file.write_text(json.dumps([{
|
||||
"path": str(jobs_json),
|
||||
"category": "cron-output",
|
||||
"timestamp": "2025-01-01T00:00:00+00:00", # very old
|
||||
"size": 123,
|
||||
}]))
|
||||
|
||||
summary = dg.quick()
|
||||
assert summary["deleted"] == 0, "cron/jobs.json must not be deleted"
|
||||
assert jobs_json.exists(), "jobs.json must still exist"
|
||||
# The stale entry should have been dropped from tracking.
|
||||
remaining = json.loads(tracked_file.read_text())
|
||||
assert len(remaining) == 0
|
||||
|
||||
def test_quick_skips_stale_cron_output_for_cron_dir(self, _isolate_env):
|
||||
"""Stale entry for the cron/ directory itself must not be deleted."""
|
||||
dg = _load_lib()
|
||||
cron_dir = _isolate_env / "cron"
|
||||
cron_dir.mkdir()
|
||||
output_dir = cron_dir / "output"
|
||||
output_dir.mkdir()
|
||||
(output_dir / "run.md").write_text("x")
|
||||
|
||||
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
|
||||
tracked_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
tracked_file.write_text(json.dumps([{
|
||||
"path": str(cron_dir),
|
||||
"category": "cron-output",
|
||||
"timestamp": "2025-01-01T00:00:00+00:00",
|
||||
"size": 0,
|
||||
}]))
|
||||
|
||||
summary = dg.quick()
|
||||
assert summary["deleted"] == 0, "cron/ dir must not be deleted"
|
||||
assert cron_dir.exists()
|
||||
|
||||
def test_quick_skips_protected_cron_paths_defense_in_depth(self, _isolate_env):
|
||||
"""Defense-in-depth: even if guess_category returned cron-output
|
||||
(hypothetically), protected cron paths are never deleted."""
|
||||
dg = _load_lib()
|
||||
cron_dir = _isolate_env / "cron"
|
||||
cron_dir.mkdir()
|
||||
tick_lock = cron_dir / ".tick.lock"
|
||||
tick_lock.write_text("")
|
||||
|
||||
# Manually inject a stale entry with "test" category (would normally
|
||||
# be auto-deleted) — the protected path guard must still block it.
|
||||
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
|
||||
tracked_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
tracked_file.write_text(json.dumps([{
|
||||
"path": str(tick_lock),
|
||||
"category": "test",
|
||||
"timestamp": "2025-01-01T00:00:00+00:00",
|
||||
"size": 0,
|
||||
}]))
|
||||
|
||||
summary = dg.quick()
|
||||
assert summary["deleted"] == 0, ".tick.lock must not be deleted"
|
||||
assert tick_lock.exists()
|
||||
|
||||
def test_dry_run_omits_stale_cron_output(self, _isolate_env):
|
||||
"""dry_run() should also skip stale cron-output entries."""
|
||||
dg = _load_lib()
|
||||
cron_dir = _isolate_env / "cron"
|
||||
cron_dir.mkdir()
|
||||
jobs_json = cron_dir / "jobs.json"
|
||||
jobs_json.write_text("[]")
|
||||
|
||||
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
|
||||
tracked_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
tracked_file.write_text(json.dumps([{
|
||||
"path": str(jobs_json),
|
||||
"category": "cron-output",
|
||||
"timestamp": "2025-01-01T00:00:00+00:00",
|
||||
"size": 123,
|
||||
}]))
|
||||
|
||||
auto, prompt = dg.dry_run()
|
||||
assert len(auto) == 0, "stale cron-output for jobs.json must not appear"
|
||||
assert len(prompt) == 0
|
||||
|
||||
def test_legitimate_cron_output_still_deleted(self, _isolate_env):
|
||||
"""A valid cron-output entry under cron/output/ must still be deleted."""
|
||||
dg = _load_lib()
|
||||
output_dir = _isolate_env / "cron" / "output" / "job_1"
|
||||
output_dir.mkdir(parents=True)
|
||||
run_md = output_dir / "run.md"
|
||||
run_md.write_text("x")
|
||||
|
||||
# Old enough to be deleted (>14 days)
|
||||
from datetime import datetime, timezone, timedelta
|
||||
old_ts = (datetime.now(timezone.utc) - timedelta(days=20)).isoformat()
|
||||
|
||||
tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
|
||||
tracked_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
tracked_file.write_text(json.dumps([{
|
||||
"path": str(run_md),
|
||||
"category": "cron-output",
|
||||
"timestamp": old_ts,
|
||||
"size": 10,
|
||||
}]))
|
||||
|
||||
summary = dg.quick()
|
||||
assert summary["deleted"] == 1, "valid old cron-output should be deleted"
|
||||
assert not run_md.exists()
|
||||
|
||||
|
||||
class TestTrackForgetQuick:
|
||||
def test_track_then_quick_deletes_test(self, _isolate_env):
|
||||
dg = _load_lib()
|
||||
|
||||
Reference in New Issue
Block a user