Files
hermes-agent/tests/website/test_extract_skills.py
Teknium 59510d7b44 feat(skills): fix browse cap, add source links + copy buttons + category cleanup (#37143)
Skills discovery surfaced ~136 of 88k skills in the CLI and gave community
skills no clickable source on the docs page. Three coupled fixes:

CLI browse:
- hermes skills browse capped at 50 because the per-source limit dict had no
  'hermes-index' key — when the centralized index is available the router
  skips external APIs and serves only the index, so the default-50 fallthrough
  silently truncated the whole hub. Add hermes-index: 5000. Browse now loads
  5367 (269 pages) instead of 136.
- Add an Identifier column + install/inspect hint to the browse table so users
  can act on what they see without a second 'search'.
- Route the TUI browse_skills() helper through parallel_search_sources so it
  inherits the same index-aware source-skip (was double-counting); expose
  identifier in its output.

Docs Skills Hub page:
- Synthesize a sourceUrl for every community skill (github tree URL, clawhub /
  skills.sh / lobehub / browse.sh detail pages), preferring the adapter's
  explicit extra.detail_url/source_url/repo_url. Expanded cards now show
  'View source' for community skills (was nothing) and keep 'View full
  documentation' for built-in/optional. 99% coverage.
- Add a Copy button on the install command.
- Add a loading state instead of flashing '0 skills / No skills found' while
  the 45MB catalog fetches.

Category cleanup:
- _guess_category fell back to tags[0] verbatim, producing ~430 junk one-off
  categories (version strings, brand names: '0.10.7 Dev', 'Doramagic Crystal').
  Now only curated buckets are accepted; unknowns fold into 'Other'. Widen the
  tag->category map so common community tags route to real buckets. 430 -> 173
  categories, top 20 all meaningful.

Tests: tests/website/test_extract_skills.py covers _source_url synthesis +
precedence and _guess_category curation (13 tests). All 27 skills-hub CLI
tests still pass. Docusaurus build verified; expanded cards confirmed in
browser for both community (View source) and built-in (View full docs).
2026-06-01 19:52:28 -07:00

117 lines
4.3 KiB
Python

"""Tests for website/scripts/extract-skills.py helpers.
Covers the two behavioral contracts added when the Skills Hub page gained
per-skill source links and a cleaned-up category sidebar:
1. ``_source_url`` — every community skill must resolve to a clickable
origin URL (explicit ``extra`` URL preferred, else synthesized from the
identifier shape). Built-in/optional skills intentionally return ""
they have a generated docs page (docsPath) instead.
2. ``_guess_category`` — tags only map to a curated category bucket;
unknown tags fall to ``uncategorized`` (folded into "Other" later) so the
sidebar doesn't fill with one-off junk like version strings or brand
names.
"""
from __future__ import annotations
import importlib.util
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
EXTRACT = REPO_ROOT / "website" / "scripts" / "extract-skills.py"
@pytest.fixture(scope="module")
def mod():
spec = importlib.util.spec_from_file_location("extract_skills", EXTRACT)
assert spec is not None and spec.loader is not None
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# --------------------------------------------------------------------------
# _source_url
# --------------------------------------------------------------------------
def test_source_url_prefers_explicit_detail_url(mod):
extra = {"detail_url": "https://skills.sh/owner/repo/skill"}
assert (
mod._source_url("skills.sh", "skills-sh/owner/repo/skill", extra)
== "https://skills.sh/owner/repo/skill"
)
def test_source_url_prefers_browse_sh_source_url(mod):
# browse.sh adapter carries its origin under extra["source_url"].
extra = {"source_url": "https://airbnb.com/host"}
assert (
mod._source_url("browse-sh", "browse-sh/airbnb.com/login-abc", extra)
== "https://airbnb.com/host"
)
def test_source_url_synthesizes_github_tree_url(mod):
url = mod._source_url("github", "anthropics/skills/skills/algorithmic-art", {})
assert url == "https://github.com/anthropics/skills/tree/main/skills/algorithmic-art"
def test_source_url_synthesizes_github_root_when_no_subpath(mod):
assert mod._source_url("github", "owner/repo", {}) == "https://github.com/owner/repo"
def test_source_url_synthesizes_clawhub(mod):
assert mod._source_url("clawhub", "go-music-skill", {}) == "https://clawhub.ai/skills/go-music-skill"
def test_source_url_synthesizes_clawhub_strips_prefix(mod):
# identifier may arrive already prefixed; we must not double-prefix.
assert (
mod._source_url("clawhub", "clawhub/go-music-skill", {})
== "https://clawhub.ai/skills/go-music-skill"
)
def test_source_url_synthesizes_lobehub(mod):
assert mod._source_url("lobehub", "lobehub/chinese-paper", {}) == "https://lobehub.com/agent/chinese-paper"
def test_source_url_empty_for_unknown_source_without_identifier(mod):
assert mod._source_url("mystery", "", {}) == ""
# --------------------------------------------------------------------------
# _guess_category
# --------------------------------------------------------------------------
def test_guess_category_maps_known_tag(mod):
assert mod._guess_category(["security"]) == "security"
assert mod._guess_category(["machine-learning"]) == "mlops"
assert mod._guess_category(["crypto"]) == "blockchain"
def test_guess_category_accepts_literal_curated_key(mod):
# A skill tagged literally with a curated category key should route there.
assert mod._guess_category(["devops"]) == "devops"
def test_guess_category_rejects_junk_tag(mod):
# This is the whole point: version strings / brand names must NOT become
# their own sidebar category. They land in "uncategorized" → "Other".
assert mod._guess_category(["0.10.7 Dev"]) == "uncategorized"
assert mod._guess_category(["Doramagic Crystal"]) == "uncategorized"
assert mod._guess_category(["Ap2"]) == "uncategorized"
def test_guess_category_empty_tags(mod):
assert mod._guess_category([]) == "uncategorized"
def test_guess_category_skips_first_junk_tag_for_later_known_tag(mod):
# First tag is junk, second is curated — we should still find the curated one.
assert mod._guess_category(["Some Brand", "security"]) == "security"