Skills discovery surfaced ~136 of 88k skills in the CLI and gave community skills no clickable source on the docs page. Three coupled fixes: CLI browse: - hermes skills browse capped at 50 because the per-source limit dict had no 'hermes-index' key — when the centralized index is available the router skips external APIs and serves only the index, so the default-50 fallthrough silently truncated the whole hub. Add hermes-index: 5000. Browse now loads 5367 (269 pages) instead of 136. - Add an Identifier column + install/inspect hint to the browse table so users can act on what they see without a second 'search'. - Route the TUI browse_skills() helper through parallel_search_sources so it inherits the same index-aware source-skip (was double-counting); expose identifier in its output. Docs Skills Hub page: - Synthesize a sourceUrl for every community skill (github tree URL, clawhub / skills.sh / lobehub / browse.sh detail pages), preferring the adapter's explicit extra.detail_url/source_url/repo_url. Expanded cards now show 'View source' for community skills (was nothing) and keep 'View full documentation' for built-in/optional. 99% coverage. - Add a Copy button on the install command. - Add a loading state instead of flashing '0 skills / No skills found' while the 45MB catalog fetches. Category cleanup: - _guess_category fell back to tags[0] verbatim, producing ~430 junk one-off categories (version strings, brand names: '0.10.7 Dev', 'Doramagic Crystal'). Now only curated buckets are accepted; unknowns fold into 'Other'. Widen the tag->category map so common community tags route to real buckets. 430 -> 173 categories, top 20 all meaningful. Tests: tests/website/test_extract_skills.py covers _source_url synthesis + precedence and _guess_category curation (13 tests). All 27 skills-hub CLI tests still pass. Docusaurus build verified; expanded cards confirmed in browser for both community (View source) and built-in (View full docs).
117 lines
4.3 KiB
Python
117 lines
4.3 KiB
Python
"""Tests for website/scripts/extract-skills.py helpers.
|
|
|
|
Covers the two behavioral contracts added when the Skills Hub page gained
|
|
per-skill source links and a cleaned-up category sidebar:
|
|
|
|
1. ``_source_url`` — every community skill must resolve to a clickable
|
|
origin URL (explicit ``extra`` URL preferred, else synthesized from the
|
|
identifier shape). Built-in/optional skills intentionally return "" —
|
|
they have a generated docs page (docsPath) instead.
|
|
|
|
2. ``_guess_category`` — tags only map to a curated category bucket;
|
|
unknown tags fall to ``uncategorized`` (folded into "Other" later) so the
|
|
sidebar doesn't fill with one-off junk like version strings or brand
|
|
names.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib.util
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
EXTRACT = REPO_ROOT / "website" / "scripts" / "extract-skills.py"
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def mod():
|
|
spec = importlib.util.spec_from_file_location("extract_skills", EXTRACT)
|
|
assert spec is not None and spec.loader is not None
|
|
module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(module)
|
|
return module
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# _source_url
|
|
# --------------------------------------------------------------------------
|
|
|
|
def test_source_url_prefers_explicit_detail_url(mod):
|
|
extra = {"detail_url": "https://skills.sh/owner/repo/skill"}
|
|
assert (
|
|
mod._source_url("skills.sh", "skills-sh/owner/repo/skill", extra)
|
|
== "https://skills.sh/owner/repo/skill"
|
|
)
|
|
|
|
|
|
def test_source_url_prefers_browse_sh_source_url(mod):
|
|
# browse.sh adapter carries its origin under extra["source_url"].
|
|
extra = {"source_url": "https://airbnb.com/host"}
|
|
assert (
|
|
mod._source_url("browse-sh", "browse-sh/airbnb.com/login-abc", extra)
|
|
== "https://airbnb.com/host"
|
|
)
|
|
|
|
|
|
def test_source_url_synthesizes_github_tree_url(mod):
|
|
url = mod._source_url("github", "anthropics/skills/skills/algorithmic-art", {})
|
|
assert url == "https://github.com/anthropics/skills/tree/main/skills/algorithmic-art"
|
|
|
|
|
|
def test_source_url_synthesizes_github_root_when_no_subpath(mod):
|
|
assert mod._source_url("github", "owner/repo", {}) == "https://github.com/owner/repo"
|
|
|
|
|
|
def test_source_url_synthesizes_clawhub(mod):
|
|
assert mod._source_url("clawhub", "go-music-skill", {}) == "https://clawhub.ai/skills/go-music-skill"
|
|
|
|
|
|
def test_source_url_synthesizes_clawhub_strips_prefix(mod):
|
|
# identifier may arrive already prefixed; we must not double-prefix.
|
|
assert (
|
|
mod._source_url("clawhub", "clawhub/go-music-skill", {})
|
|
== "https://clawhub.ai/skills/go-music-skill"
|
|
)
|
|
|
|
|
|
def test_source_url_synthesizes_lobehub(mod):
|
|
assert mod._source_url("lobehub", "lobehub/chinese-paper", {}) == "https://lobehub.com/agent/chinese-paper"
|
|
|
|
|
|
def test_source_url_empty_for_unknown_source_without_identifier(mod):
|
|
assert mod._source_url("mystery", "", {}) == ""
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# _guess_category
|
|
# --------------------------------------------------------------------------
|
|
|
|
def test_guess_category_maps_known_tag(mod):
|
|
assert mod._guess_category(["security"]) == "security"
|
|
assert mod._guess_category(["machine-learning"]) == "mlops"
|
|
assert mod._guess_category(["crypto"]) == "blockchain"
|
|
|
|
|
|
def test_guess_category_accepts_literal_curated_key(mod):
|
|
# A skill tagged literally with a curated category key should route there.
|
|
assert mod._guess_category(["devops"]) == "devops"
|
|
|
|
|
|
def test_guess_category_rejects_junk_tag(mod):
|
|
# This is the whole point: version strings / brand names must NOT become
|
|
# their own sidebar category. They land in "uncategorized" → "Other".
|
|
assert mod._guess_category(["0.10.7 Dev"]) == "uncategorized"
|
|
assert mod._guess_category(["Doramagic Crystal"]) == "uncategorized"
|
|
assert mod._guess_category(["Ap2"]) == "uncategorized"
|
|
|
|
|
|
def test_guess_category_empty_tags(mod):
|
|
assert mod._guess_category([]) == "uncategorized"
|
|
|
|
|
|
def test_guess_category_skips_first_junk_tag_for_later_known_tag(mod):
|
|
# First tag is junk, second is curated — we should still find the curated one.
|
|
assert mod._guess_category(["Some Brand", "security"]) == "security"
|