fix(skills): fix transaction ordering in reset_bundled_skill and handle read-only files in rmtree

Two related bugs in tools/skills_sync.py affecting Nix-store and
immutable-package installs:

**#34972 — reset_bundled_skill corrupts manifest on rmtree failure:**
The function deleted the manifest entry BEFORE attempting rmtree. If
rmtree failed (read-only files from Nix store), the function returned
early — leaving the skill in a manifest-less limbo state where future
syncs silently skip it forever.

Fix: reorder steps — attempt rmtree FIRST, only delete manifest entry
after rmtree succeeds. If rmtree fails, nothing is changed.

**#34860 — stale .bak directories after sync:**
sync_skills() called shutil.rmtree(backup, ignore_errors=True) which
silently failed on read-only files, leaving persistent .bak dirs.

Fix: add _rmtree_writable() helper that makes files writable via an
onerror callback before retrying removal. Used in both sync_skills()
backup cleanup and reset_bundled_skill().

Fixes #34972
Fixes #34860
This commit is contained in:
annguyenNous
2026-05-30 10:55:24 +07:00
committed by Teknium
parent a57cc00081
commit 83a7d0b601
2 changed files with 60 additions and 13 deletions

View File

@ -845,3 +845,31 @@ class TestResetBundledSkill:
post_manifest = _read_manifest()
assert "google-workspace" in post_manifest
assert (skills_dir / "productivity" / "google-workspace" / "SKILL.md").exists()
def test_reset_restore_preserves_manifest_on_rmtree_failure(self, tmp_path):
"""#34972: when rmtree fails (e.g. read-only Nix-store files), the manifest
entry must NOT be deleted — otherwise the skill enters a limbo state."""
import os, stat
bundled = self._setup_bundled(tmp_path)
skills_dir = tmp_path / "user_skills"
manifest_file = skills_dir / ".bundled_manifest"
dest = skills_dir / "productivity" / "google-workspace"
dest.mkdir(parents=True)
(dest / "SKILL.md").write_text("# user version\n")
# Make directory read-only to simulate Nix-store permissions
os.chmod(dest, stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH)
manifest_file.write_text("google-workspace:STALEHASH000000000000000000000000\n")
with self._patches(bundled, skills_dir, manifest_file):
result = reset_bundled_skill("google-workspace", restore=True)
# Restore failed, but manifest must be preserved
assert result["ok"] is False
assert result["action"] == "not_reset"
assert "Manifest entry preserved" in result["message"]
# Manifest still has the old entry (not deleted)
manifest_after = manifest_file.read_text()
assert "google-workspace" in manifest_after
# Cleanup: restore permissions for tmp_path removal
os.chmod(dest, stat.S_IRWXU)

View File

@ -517,7 +517,10 @@ def sync_skills(quiet: bool = False) -> dict:
if not quiet:
print(f"{skill_name} (updated)")
# Remove backup after successful copy
shutil.rmtree(backup, ignore_errors=True)
try:
_rmtree_writable(backup)
except (OSError, IOError):
logger.debug("Could not remove backup %s", backup, exc_info=True)
except (OSError, IOError):
# Restore from backup
if backup.exists() and not dest.exists():
@ -563,6 +566,21 @@ def sync_skills(quiet: bool = False) -> dict:
}
def _rmtree_writable(path: Path) -> None:
"""Remove a directory tree, making read-only files writable first.
Handles immutable package sources (Nix store, deb/rpm installs) that
preserve read-only permissions on copied files. See #34860, #34972.
"""
def _on_error(func, fpath, exc_info):
# Make the file/directory writable and retry
import stat
os.chmod(fpath, stat.S_IWRITE)
func(fpath)
shutil.rmtree(path, onerror=_on_error)
def reset_bundled_skill(name: str, restore: bool = False) -> dict:
"""
Reset a bundled skill's manifest tracking so future syncs work normally.
@ -606,12 +624,9 @@ def reset_bundled_skill(name: str, restore: bool = False) -> dict:
"synced": None,
}
# Step 1: drop the manifest entry so next sync treats it as new
if in_manifest:
del manifest[name]
_write_manifest(manifest)
# Step 2 (optional): delete the user's copy so next sync re-copies bundled
# Step 1 (optional): delete the user's copy so next sync re-copies bundled.
# Must happen BEFORE manifest deletion so that a failed rmtree does not
# leave the skill in a manifest-less limbo state (see #34972).
deleted_user_copy = False
if restore:
if not is_bundled:
@ -619,28 +634,32 @@ def reset_bundled_skill(name: str, restore: bool = False) -> dict:
"ok": False,
"action": "bundled_missing",
"message": (
f"'{name}' has no bundled source — manifest entry cleared "
f"'{name}' has no bundled source — manifest entry preserved "
f"but cannot restore from bundled (skill was removed upstream)."
),
"synced": None,
}
# The destination mirrors the bundled path relative to bundled_dir.
dest = _compute_relative_dest(bundled_by_name[name], bundled_dir)
if dest.exists():
try:
shutil.rmtree(dest)
_rmtree_writable(dest)
deleted_user_copy = True
except (OSError, IOError) as e:
return {
"ok": False,
"action": "manifest_cleared",
"action": "not_reset",
"message": (
f"Cleared manifest entry for '{name}' but could not "
f"delete user copy at {dest}: {e}"
f"Could not delete user copy at {dest}: {e}. "
f"Manifest entry preserved — nothing was changed."
),
"synced": None,
}
# Step 2: drop the manifest entry so next sync treats it as new
if in_manifest:
del manifest[name]
_write_manifest(manifest)
# Step 3: run sync to re-baseline (or re-copy if we deleted)
synced = sync_skills(quiet=True)