feat(skills): categorize tap skills from skills.sh.json grouping sidecar

A GitHub tap can ship a repo-root skills.sh.json (the published skills.sh schema) declaring category groupings. The Skills Hub now reads it at index time and uses each grouping title as the skill's category label, instead of the tag-derived guess. Generic: any tap that ships the file gets real categorization — NVIDIA's groupings (Inference AI, Decision Optimization, GPU Development, etc.) flow through automatically. - GitHubSource: _get_skillsh_groupings() fetches+caches the sidecar per repo; _parse_skillsh_groupings() flattens it to {skill_name: title}; _list_skills_in_repo() stamps meta.extra['category']; _meta_to_dict now serializes extra so the category survives the index cache round-trip. - extract-skills.py: prefers extra['category'] over the tag heuristic and exempts sidecar categories from the small-category to Other collapse. - Docs + 12 tests.
2026-05-29 12:09:52 -07:00
parent 4de8009ce4
commit b6ed3913d2
4 changed files with 240 additions and 2 deletions
--- a/tests/tools/test_skills_hub.py
+++ b/tests/tools/test_skills_hub.py
@ -70,6 +70,143 @@ class TestParseFrontmatterQuick:
        assert fm == {}
 # ---------------------------------------------------------------------------
 # GitHubSource skills.sh.json grouping sidecar (category support)
 # ---------------------------------------------------------------------------
 class TestSkillsShGroupings:
    """Parsing + stamping of the skills.sh.json grouping sidecar.
    A tap can ship a repo-root ``skills.sh.json`` declaring category
    groupings; we flatten it to {skill_name: title} and stamp the title onto
    each SkillMeta's ``extra["category"]``. This is the generic cross-ecosystem
    mechanism behind NVIDIA-style categorization — not NVIDIA-specific.
    """
    def test_parse_basic_groupings(self):
        content = json.dumps({
            "$schema": "https://skills.sh/schemas/skills.sh.schema.json",
            "groupings": [
                {"title": "Inference AI", "skills": ["dynamo-router", "dynamo-recipe"]},
                {"title": "Decision Optimization", "skills": ["cuopt-developer"]},
            ],
        })
        mapping = GitHubSource._parse_skillsh_groupings(content)
        assert mapping == {
            "dynamo-router": "Inference AI",
            "dynamo-recipe": "Inference AI",
            "cuopt-developer": "Decision Optimization",
        }
    def test_parse_invalid_json_returns_none(self):
        assert GitHubSource._parse_skillsh_groupings("not json{{") is None
    def test_parse_non_dict_returns_none(self):
        assert GitHubSource._parse_skillsh_groupings("[1, 2, 3]") is None
    def test_parse_missing_groupings_returns_none(self):
        assert GitHubSource._parse_skillsh_groupings('{"foo": 1}') is None
    def test_parse_empty_groupings_returns_empty_map(self):
        assert GitHubSource._parse_skillsh_groupings('{"groupings": []}') == {}
    def test_parse_tolerates_malformed_group(self):
        # A group missing its skills list is skipped; the valid one survives.
        content = json.dumps({"groupings": [
            {"title": "X"},                              # no skills -> skipped
            {"skills": ["a"]},                           # no title -> skipped
            {"title": "Y", "skills": ["b", 5, None]},    # only valid string members kept
        ]})
        assert GitHubSource._parse_skillsh_groupings(content) == {"b": "Y"}
    def test_parse_first_grouping_wins_on_duplicate(self):
        content = json.dumps({"groupings": [
            {"title": "First", "skills": ["dup"]},
            {"title": "Second", "skills": ["dup"]},
        ]})
        assert GitHubSource._parse_skillsh_groupings(content) == {"dup": "First"}
    def test_get_groupings_caches_per_repo(self):
        auth = MagicMock()
        src = GitHubSource(auth=auth)
        content = json.dumps({"groupings": [{"title": "T", "skills": ["s"]}]})
        with patch.object(src, "_fetch_file_content", return_value=content) as mock_fetch:
            first = src._get_skillsh_groupings("acme/skills")
            second = src._get_skillsh_groupings("acme/skills")
        assert first == {"s": "T"}
        assert second == {"s": "T"}
        # Second call must hit the per-repo cache, not GitHub again.
        mock_fetch.assert_called_once_with("acme/skills", "skills.sh.json")
    def test_get_groupings_no_sidecar_returns_none_and_caches(self):
        auth = MagicMock()
        src = GitHubSource(auth=auth)
        with patch.object(src, "_fetch_file_content", return_value=None) as mock_fetch:
            assert src._get_skillsh_groupings("acme/skills") is None
            assert src._get_skillsh_groupings("acme/skills") is None
        mock_fetch.assert_called_once()
    def test_list_skills_stamps_category_from_sidecar(self):
        auth = MagicMock()
        src = GitHubSource(auth=auth)
        meta = SkillMeta(
            name="cuopt-developer", description="d", source="github",
            identifier="NVIDIA/skills/skills/cuopt-developer", trust_level="trusted",
        )
        contents = [{"type": "dir", "name": "cuopt-developer"}]
        groupings = {"cuopt-developer": "Decision Optimization"}
        resp = MagicMock()
        resp.status_code = 200
        resp.json.return_value = contents
        with patch.object(src, "_read_cache", return_value=None), \
             patch.object(src, "_write_cache"), \
             patch.object(src, "_get_skillsh_groupings", return_value=groupings), \
             patch.object(src, "inspect", return_value=meta), \
             patch("tools.skills_hub.httpx.get", return_value=resp):
            skills = src._list_skills_in_repo("NVIDIA/skills", "skills/")
        assert len(skills) == 1
        assert skills[0].extra["category"] == "Decision Optimization"
    def test_list_skills_no_sidecar_leaves_extra_empty(self):
        auth = MagicMock()
        src = GitHubSource(auth=auth)
        meta = SkillMeta(
            name="foo", description="d", source="github",
            identifier="acme/skills/skills/foo", trust_level="community",
        )
        resp = MagicMock()
        resp.status_code = 200
        resp.json.return_value = [{"type": "dir", "name": "foo"}]
        with patch.object(src, "_read_cache", return_value=None), \
             patch.object(src, "_write_cache"), \
             patch.object(src, "_get_skillsh_groupings", return_value=None), \
             patch.object(src, "inspect", return_value=meta), \
             patch("tools.skills_hub.httpx.get", return_value=resp):
            skills = src._list_skills_in_repo("acme/skills", "skills/")
        assert len(skills) == 1
        assert "category" not in skills[0].extra
    def test_meta_to_dict_roundtrip_preserves_extra(self):
        meta = SkillMeta(
            name="x", description="d", source="github",
            identifier="acme/skills/x", trust_level="trusted",
            extra={"category": "Inference AI"},
        )
        d = GitHubSource._meta_to_dict(meta)
        assert d["extra"] == {"category": "Inference AI"}
        # Round-trips back through the cache deserialization path.
        restored = SkillMeta(**d)
        assert restored.extra == {"category": "Inference AI"}
 # ---------------------------------------------------------------------------
 # GitHubSource.trust_level_for
 # ---------------------------------------------------------------------------
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@ -420,6 +420,10 @@ class GitHubSource(SkillSource):
        # Per-instance cache: repo -> (default_branch, tree_entries)
        # Survives within a single search/install flow, avoiding redundant API calls.
        self._tree_cache: Dict[str, Tuple[str, List[dict]]] = {}
        # Per-repo cache of the optional skills.sh.json grouping sidecar,
        # mapping skill_name -> human-readable grouping title. ``None`` means
        # "fetched, no sidecar"; a missing key means "not fetched yet".
        self._skillsh_groupings: Dict[str, Optional[Dict[str, str]]] = {}
        # Set when GitHub returns 403 with rate limit exhausted
        self._rate_limited: bool = False
@ -558,6 +562,7 @@ class GitHubSource(SkillSource):
            return []
        skills: List[SkillMeta] = []
        groupings = self._get_skillsh_groupings(repo)
        for entry in entries:
            if entry.get("type") != "dir":
                continue
@ -570,6 +575,10 @@ class GitHubSource(SkillSource):
            skill_identifier = f"{repo}/{prefix}/{dir_name}" if prefix else f"{repo}/{dir_name}"
            meta = self.inspect(skill_identifier)
            if meta:
                if groupings:
                    category = groupings.get(meta.name) or groupings.get(dir_name)
                    if category:
                        meta.extra["category"] = category
                skills.append(meta)
        # Cache the results
@ -772,6 +781,61 @@ class GitHubSource(SkillSource):
            logger.debug("GitHub contents API fetch failed: %s", e)
        return None
    def _get_skillsh_groupings(self, repo: str) -> Optional[Dict[str, str]]:
        """Fetch and parse the repo-root ``skills.sh.json`` grouping sidecar.
        ``skills.sh.json`` is a published cross-ecosystem standard
        (``$schema: https://skills.sh/schemas/skills.sh.schema.json``) that
        lets a tap declare human-readable category groupings for its skills:
            {"groupings": [{"title": "Inference AI", "skills": ["dynamo-..."]}]}
        We flatten it into ``{skill_name: grouping_title}`` so the Skills Hub
        UI can show a real category pill instead of a tag-derived guess. Any
        tap that ships this file gets categorization for free — this is not
        NVIDIA-specific.
        Returns the map (possibly empty) on success, or ``None`` when the repo
        has no sidecar / it couldn't be parsed. Cached per-repo on the instance.
        """
        if repo in self._skillsh_groupings:
            return self._skillsh_groupings[repo]
        content = self._fetch_file_content(repo, "skills.sh.json")
        groupings = self._parse_skillsh_groupings(content) if content else None
        self._skillsh_groupings[repo] = groupings
        return groupings
    @staticmethod
    def _parse_skillsh_groupings(content: str) -> Optional[Dict[str, str]]:
        """Flatten a ``skills.sh.json`` document into ``{skill_name: title}``.
        Returns ``None`` when the content isn't a usable grouping document.
        """
        try:
            data = json.loads(content)
        except (json.JSONDecodeError, TypeError):
            return None
        if not isinstance(data, dict):
            return None
        groupings = data.get("groupings")
        if not isinstance(groupings, list):
            return None
        mapping: Dict[str, str] = {}
        for group in groupings:
            if not isinstance(group, dict):
                continue
            title = group.get("title")
            members = group.get("skills")
            if not isinstance(title, str) or not isinstance(members, list):
                continue
            for member in members:
                if isinstance(member, str) and member:
                    # First grouping wins if a skill is listed twice.
                    mapping.setdefault(member, title)
        return mapping
    def _read_cache(self, key: str) -> Optional[list]:
        """Read cached index if not expired."""
        cache_file = INDEX_CACHE_DIR / f"{key}.json"
@ -805,6 +869,7 @@ class GitHubSource(SkillSource):
            "repo": meta.repo,
            "path": meta.path,
            "tags": meta.tags,
            "extra": meta.extra,
        }
    @staticmethod
--- a/website/docs/user-guide/features/skills.md
+++ b/website/docs/user-guide/features/skills.md
@ -477,6 +477,25 @@ hermes skills install openai/skills/k8s
 hermes skills tap add myorg/skills-repo
 ```
 **Category groupings (`skills.sh.json`).** A GitHub tap may ship a
 `skills.sh.json` file at its repo root following the
 [skills.sh schema](https://skills.sh/schemas/skills.sh.schema.json). Its
 `groupings` (each with a `title` and a list of skill names) are read at index
 time and become the category labels shown in the
 [Skills Hub](https://hermes-agent.nousresearch.com/docs) page — instead of a
 tag-derived guess. This is generic: any tap that ships the file gets real
 categorization, no Hermes-side changes required.
 ```json
 {
  "$schema": "https://skills.sh/schemas/skills.sh.schema.json",
  "groupings": [
    { "title": "Inference AI", "skills": ["dynamo-recipe-runner", "dynamo-router-sla"] },
    { "title": "Decision Optimization", "skills": ["cuopt-developer", "cuopt-install"] }
  ]
 }
 ```
 #### 5. ClawHub (`clawhub`)
 A third-party skills marketplace integrated as a community source.
--- a/website/scripts/extract-skills.py
+++ b/website/scripts/extract-skills.py
@ -343,6 +343,15 @@ def extract_unified_index_skills():
        category = _guess_category(tags)
        extra = entry.get("extra", {}) or {}
        # A skills.sh.json grouping sidecar (if the tap ships one) gives us a
        # real, human-readable category — prefer it over the tag heuristic.
        # extra["category"] holds the grouping title, e.g. "Inference AI".
        sidecar_category = extra.get("category") if isinstance(extra, dict) else None
        category_label_override = ""
        if isinstance(sidecar_category, str) and sidecar_category.strip():
            category_label_override = sidecar_category.strip()
            category = category_label_override.lower().replace(" ", "-")
        # Author hint from extras when available (skills.sh has installs;
        # clawhub doesn't expose author).
        author = ""
@ -358,7 +367,8 @@ def extract_unified_index_skills():
            "description": description,
            "overview": "",
            "category": category,
-            "categoryLabel": "",  # filled in _consolidate_small_categories
+            "categoryLabel": category_label_override,  # set from sidecar, else filled in _consolidate_small_categories
            "fixedCategory": bool(category_label_override),  # sidecar categories are exempt from small-cat collapse
            "source": source_label,
            "tags": tags,
            "platforms": [],
@ -491,10 +501,17 @@ def _consolidate_small_categories(skills: list) -> list:
            s["category"] = "other"
            s["categoryLabel"] = "Other"
-    counts = Counter(s["category"] for s in skills)
+    # Skills with a sidecar-declared category (skills.sh.json grouping) keep
    # their category even if it's the only skill in it — the tap explicitly
    # chose that label, so it's not a heuristic guess to collapse away.
    counts = Counter(
        s["category"] for s in skills if not s.get("fixedCategory")
    )
    small_cats = {cat for cat, n in counts.items() if n < MIN_CATEGORY_SIZE}
    for s in skills:
        if s.get("fixedCategory"):
            continue
        if s["category"] in small_cats:
            s["category"] = "other"
            s["categoryLabel"] = "Other"