feat(skills): fix browse cap, add source links + copy buttons + category cleanup (#37143)

Skills discovery surfaced ~136 of 88k skills in the CLI and gave community
skills no clickable source on the docs page. Three coupled fixes:

CLI browse:
- hermes skills browse capped at 50 because the per-source limit dict had no
  'hermes-index' key — when the centralized index is available the router
  skips external APIs and serves only the index, so the default-50 fallthrough
  silently truncated the whole hub. Add hermes-index: 5000. Browse now loads
  5367 (269 pages) instead of 136.
- Add an Identifier column + install/inspect hint to the browse table so users
  can act on what they see without a second 'search'.
- Route the TUI browse_skills() helper through parallel_search_sources so it
  inherits the same index-aware source-skip (was double-counting); expose
  identifier in its output.

Docs Skills Hub page:
- Synthesize a sourceUrl for every community skill (github tree URL, clawhub /
  skills.sh / lobehub / browse.sh detail pages), preferring the adapter's
  explicit extra.detail_url/source_url/repo_url. Expanded cards now show
  'View source' for community skills (was nothing) and keep 'View full
  documentation' for built-in/optional. 99% coverage.
- Add a Copy button on the install command.
- Add a loading state instead of flashing '0 skills / No skills found' while
  the 45MB catalog fetches.

Category cleanup:
- _guess_category fell back to tags[0] verbatim, producing ~430 junk one-off
  categories (version strings, brand names: '0.10.7 Dev', 'Doramagic Crystal').
  Now only curated buckets are accepted; unknowns fold into 'Other'. Widen the
  tag->category map so common community tags route to real buckets. 430 -> 173
  categories, top 20 all meaningful.

Tests: tests/website/test_extract_skills.py covers _source_url synthesis +
precedence and _guess_category curation (13 tests). All 27 skills-hub CLI
tests still pass. Docusaurus build verified; expanded cards confirmed in
browser for both community (View source) and built-in (View full docs).
This commit is contained in:
Teknium
2026-06-01 19:52:28 -07:00
committed by GitHub
parent 0cd5867bbb
commit 59510d7b44
5 changed files with 399 additions and 43 deletions

View File

@ -335,7 +335,14 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
# Collect results from all (or filtered) sources in parallel.
# Per-source limits are generous — parallelism + 30s timeout cap prevents hangs.
_TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1}
# NOTE: when the centralized index is available, parallel_search_sources
# skips the external API sources and serves everything from "hermes-index".
# That source MUST therefore carry a high limit, or browse silently caps
# the entire hub at the default (50) — it shipped that way and surfaced
# ~136 of 88k skills. The external-source limits below only apply when the
# index is unavailable (offline / first run before the cache populates).
_PER_SOURCE_LIMIT = {
"hermes-index": 5000,
"official": 200, "skills-sh": 200, "well-known": 50,
"github": 200, "clawhub": 500, "claude-marketplace": 100,
"lobehub": 500, "browse-sh": 500,
@ -396,18 +403,22 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
# Build table
table = Table(show_header=True, header_style="bold")
table.add_column("#", style="dim", width=4, justify="right")
table.add_column("Name", style="bold cyan", max_width=25)
table.add_column("Description", max_width=50)
table.add_column("Name", style="bold cyan", max_width=22)
table.add_column("Description", max_width=44)
table.add_column("Source", style="dim", width=12)
table.add_column("Trust", width=10)
# The identifier is what you pass to `hermes skills install`. Browse used
# to omit it entirely, so users couldn't act on what they saw without a
# second `search`. overflow="fold" keeps long slugs copy-pasteable.
table.add_column("Identifier", style="dim", overflow="fold", no_wrap=False)
for i, r in enumerate(page_items, start=start + 1):
trust_style = {"builtin": "bright_cyan", "trusted": "green",
"community": "yellow"}.get(r.trust_level, "dim")
trust_label = "★ official" if r.source == "official" else r.trust_level
desc = r.description[:50]
if len(r.description) > 50:
desc = r.description[:44]
if len(r.description) > 44:
desc += "..."
table.add_row(
@ -416,6 +427,7 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
desc,
r.source,
f"[{trust_style}]{trust_label}[/]",
r.identifier,
)
c.print(table)
@ -439,7 +451,9 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
c.print(f" [yellow]⚡ Slow sources skipped: {', '.join(timed_out)} "
f"— run again for cached results[/]")
c.print("[dim]Tip: 'hermes skills search <query>' searches deeper across all registries[/]\n")
c.print("[dim]Tip: 'hermes skills inspect <identifier>' to preview, "
"'hermes skills install <identifier>' to install, "
"'hermes skills search <query>' to search deeper[/]\n")
def do_install(identifier: str, category: str = "", force: bool = False,
@ -725,24 +739,27 @@ def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> di
Returns ``{"items": [...], "page": int, "total_pages": int, "total": int}``.
"""
from tools.skills_hub import GitHubAuth, create_source_router
from tools.skills_hub import (
GitHubAuth, create_source_router, parallel_search_sources,
)
page_size = max(1, min(page_size, 100))
_TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1}
_PER_SOURCE_LIMIT = {"official": 100, "skills-sh": 100, "well-known": 25, "github": 100, "clawhub": 50,
# "hermes-index" must carry a high limit: when the index is available the
# router skips external API sources and serves everything from it, so a
# low cap here silently truncates the whole hub (see do_browse note).
_PER_SOURCE_LIMIT = {"hermes-index": 5000, "official": 100, "skills-sh": 100,
"well-known": 25, "github": 100, "clawhub": 50,
"claude-marketplace": 50, "lobehub": 50, "browse-sh": 500}
auth = GitHubAuth()
sources = create_source_router(auth)
all_results: list = []
for src in sources:
sid = src.source_id()
if source != "all" and sid != source and sid != "official":
continue
try:
limit = _PER_SOURCE_LIMIT.get(sid, 50)
all_results.extend(src.search("", limit=limit))
except Exception:
continue
# Delegate to the shared parallel walker so this inherits the index-aware
# source-skip logic — querying hermes-index AND the external APIs at once
# would double-count every skill.
all_results, _counts, _timed_out = parallel_search_sources(
sources, query="", per_source_limits=_PER_SOURCE_LIMIT,
source_filter=source, overall_timeout=30,
)
if not all_results:
return {"items": [], "page": 1, "total_pages": 1, "total": 0}
seen: dict = {}
@ -759,7 +776,7 @@ def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> di
page_items = deduped[start : min(start + page_size, total)]
return {
"items": [{"name": r.name, "description": r.description, "source": r.source,
"trust": r.trust_level} for r in page_items],
"trust": r.trust_level, "identifier": r.identifier} for r in page_items],
"page": page,
"total_pages": total_pages,
"total": total,

View File

@ -0,0 +1,116 @@
"""Tests for website/scripts/extract-skills.py helpers.
Covers the two behavioral contracts added when the Skills Hub page gained
per-skill source links and a cleaned-up category sidebar:
1. ``_source_url`` — every community skill must resolve to a clickable
origin URL (explicit ``extra`` URL preferred, else synthesized from the
identifier shape). Built-in/optional skills intentionally return ""
they have a generated docs page (docsPath) instead.
2. ``_guess_category`` — tags only map to a curated category bucket;
unknown tags fall to ``uncategorized`` (folded into "Other" later) so the
sidebar doesn't fill with one-off junk like version strings or brand
names.
"""
from __future__ import annotations
import importlib.util
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
EXTRACT = REPO_ROOT / "website" / "scripts" / "extract-skills.py"
@pytest.fixture(scope="module")
def mod():
spec = importlib.util.spec_from_file_location("extract_skills", EXTRACT)
assert spec is not None and spec.loader is not None
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# --------------------------------------------------------------------------
# _source_url
# --------------------------------------------------------------------------
def test_source_url_prefers_explicit_detail_url(mod):
extra = {"detail_url": "https://skills.sh/owner/repo/skill"}
assert (
mod._source_url("skills.sh", "skills-sh/owner/repo/skill", extra)
== "https://skills.sh/owner/repo/skill"
)
def test_source_url_prefers_browse_sh_source_url(mod):
# browse.sh adapter carries its origin under extra["source_url"].
extra = {"source_url": "https://airbnb.com/host"}
assert (
mod._source_url("browse-sh", "browse-sh/airbnb.com/login-abc", extra)
== "https://airbnb.com/host"
)
def test_source_url_synthesizes_github_tree_url(mod):
url = mod._source_url("github", "anthropics/skills/skills/algorithmic-art", {})
assert url == "https://github.com/anthropics/skills/tree/main/skills/algorithmic-art"
def test_source_url_synthesizes_github_root_when_no_subpath(mod):
assert mod._source_url("github", "owner/repo", {}) == "https://github.com/owner/repo"
def test_source_url_synthesizes_clawhub(mod):
assert mod._source_url("clawhub", "go-music-skill", {}) == "https://clawhub.ai/skills/go-music-skill"
def test_source_url_synthesizes_clawhub_strips_prefix(mod):
# identifier may arrive already prefixed; we must not double-prefix.
assert (
mod._source_url("clawhub", "clawhub/go-music-skill", {})
== "https://clawhub.ai/skills/go-music-skill"
)
def test_source_url_synthesizes_lobehub(mod):
assert mod._source_url("lobehub", "lobehub/chinese-paper", {}) == "https://lobehub.com/agent/chinese-paper"
def test_source_url_empty_for_unknown_source_without_identifier(mod):
assert mod._source_url("mystery", "", {}) == ""
# --------------------------------------------------------------------------
# _guess_category
# --------------------------------------------------------------------------
def test_guess_category_maps_known_tag(mod):
assert mod._guess_category(["security"]) == "security"
assert mod._guess_category(["machine-learning"]) == "mlops"
assert mod._guess_category(["crypto"]) == "blockchain"
def test_guess_category_accepts_literal_curated_key(mod):
# A skill tagged literally with a curated category key should route there.
assert mod._guess_category(["devops"]) == "devops"
def test_guess_category_rejects_junk_tag(mod):
# This is the whole point: version strings / brand names must NOT become
# their own sidebar category. They land in "uncategorized" → "Other".
assert mod._guess_category(["0.10.7 Dev"]) == "uncategorized"
assert mod._guess_category(["Doramagic Crystal"]) == "uncategorized"
assert mod._guess_category(["Ap2"]) == "uncategorized"
def test_guess_category_empty_tags(mod):
assert mod._guess_category([]) == "uncategorized"
def test_guess_category_skips_first_junk_tag_for_later_known_tag(mod):
# First tag is junk, second is curated — we should still find the curated one.
assert mod._guess_category(["Some Brand", "security"]) == "security"

View File

@ -48,7 +48,7 @@ CATEGORY_LABELS = {
"data-science": "Data Science",
"devops": "DevOps",
"dogfood": "Dogfood",
"domain": "Domain",
"domain": "Business & Finance",
"email": "Email",
"gaming": "Gaming",
"gifs": "GIFs",
@ -193,6 +193,60 @@ def _install_command(source: str, identifier: str, name: str) -> str:
return f"hermes skills install {identifier}"
def _source_url(source: str, identifier: str, extra: dict) -> str:
"""Best-effort clickable URL to the skill's origin (repo / detail page).
Community skills have no generated docs page, so without this the
expanded card on the Skills Hub gives users nowhere to go to read the
actual SKILL.md before installing. We prefer an explicit URL the source
adapter already collected (``extra.detail_url`` / ``extra.repo_url``),
then fall back to synthesizing one from the identifier shape.
"""
extra = extra or {}
for key in ("detail_url", "source_url", "repo_url", "url", "index_url"):
val = extra.get(key)
if isinstance(val, str) and val.startswith("http"):
return val
if not identifier:
return ""
src = (source or "").lower()
# GitHub-backed taps (openai/anthropic/nvidia/hf/gstack/VoltAgent/...):
# identifier is "owner/repo/<path...>" — link to the directory on GitHub.
if src in {"github", "openai", "anthropic", "huggingface", "nvidia",
"gstack", "voltagent", "minimax", "claude marketplace",
"claude-marketplace"}:
parts = [p for p in identifier.split("/") if p]
if len(parts) >= 2:
owner, repo = parts[0], parts[1]
sub = "/".join(parts[2:])
base = f"https://github.com/{owner}/{repo}"
return f"{base}/tree/main/{sub}" if sub else base
return ""
if src == "clawhub":
# identifier is a bare slug (the "clawhub/" prefix is added at install time)
slug = identifier[len("clawhub/"):] if identifier.startswith("clawhub/") else identifier
return f"https://clawhub.ai/skills/{slug}"
if src in {"skills.sh", "skills-sh"}:
# "skills-sh/owner/repo/skill" -> the skills.sh detail page
rest = identifier[len("skills-sh/"):] if identifier.startswith("skills-sh/") else identifier
return f"https://skills.sh/skills/{rest}"
if src == "lobehub":
slug = identifier[len("lobehub/"):] if identifier.startswith("lobehub/") else identifier
return f"https://lobehub.com/agent/{slug}"
if src in {"browse.sh", "browse-sh"}:
# "browse-sh/<hostname>/<task-id>" -> browse.sh task page
rest = identifier[len("browse-sh/"):] if identifier.startswith("browse-sh/") else identifier
return f"https://browse.sh/skills/{rest}"
return ""
def extract_local_skills():
skills = []
@ -361,6 +415,7 @@ def extract_unified_index_skills():
author = repo.split("/")[0]
install_cmd = _install_command(source_id, identifier, name)
source_url = _source_url(source_id, identifier, extra)
out.append({
"name": name,
@ -380,6 +435,7 @@ def extract_unified_index_skills():
"docsPath": "",
"identifier": identifier,
"installCmd": install_cmd,
"sourceUrl": source_url,
})
return out, meta
@ -460,26 +516,60 @@ for _cat, _tags in {
"software-development": [
"programming", "code", "coding", "software-development",
"frontend-development", "backend-development", "web-development",
"react", "python", "typescript", "java", "rust",
"react", "python", "typescript", "java", "rust", "cli",
"developer-tools", "development", "api", "database", "debugging",
"documentation", "testing", "test", "architecture",
],
"creative": ["writing", "design", "creative", "art", "image-generation"],
"research": ["education", "academic", "research"],
"social-media": ["marketing", "seo", "social-media"],
"productivity": ["productivity", "business"],
"data-science": ["data", "data-science"],
"mlops": ["machine-learning", "deep-learning"],
"devops": ["devops"],
"autonomous-ai-agents": [
"ai", "agent", "agents", "ai-agent", "ai-agents", "agentic",
"agentic-ai", "ai-assistant", "assistant", "multi-agent",
"autonomous", "llm", "rag", "prompt", "prompts", "a2a", "acp",
],
"creative": [
"writing", "design", "creative", "art", "image-generation",
"image", "content", "video-editing", "content-creation",
],
"research": ["education", "academic", "academic-writing", "research", "knowledge"],
"social-media": ["marketing", "seo", "social-media", "advertising", "creator"],
"productivity": [
"productivity", "business", "automation", "calendar", "email",
"document", "documents", "office", "notes", "note-taking",
"collaboration", "workflow", "crm",
],
"data-science": ["data", "data-science", "analytics", "analysis", "visualization"],
"mlops": ["machine-learning", "deep-learning", "mlops", "training", "fine-tuning"],
"devops": ["devops", "docker", "kubernetes", "infrastructure", "deployment", "monitoring", "ci-cd"],
"gaming": ["gaming", "game", "game-development"],
"media": ["music", "media", "video"],
"health": ["health", "fitness"],
"translation": ["translation", "language-learning"],
"security": ["security", "cybersecurity"],
"media": ["music", "media", "video", "audio", "podcast", "youtube"],
"health": ["health", "fitness", "medical", "wellness"],
"translation": ["translation", "language-learning", "i18n", "localization"],
"security": ["security", "cybersecurity", "auth", "compliance", "audit", "privacy"],
"blockchain": [
"blockchain", "crypto", "cryptocurrency", "defi", "web3",
"bitcoin", "ethereum", "nft", "trading", "arbitrage",
],
"communication": ["communication", "chat", "messaging", "slack", "discord"],
"domain": [
"finance", "accounting", "banking", "ecommerce", "e-commerce",
"shopping", "travel", "booking", "real-estate", "legal",
"government", "b2b", "b2b-sales", "entrepreneur", "budget",
],
}.items():
for _t in _tags:
TAG_TO_CATEGORY[_t] = _cat
def _guess_category(tags: list) -> str:
"""Map a skill's tags to a curated category, or 'uncategorized'.
Previously this fell back to ``tags[0]`` verbatim, which produced
hundreds of junk one-off "categories" in the sidebar (e.g.
"Doramagic Crystal", "0.10.7 Dev", "Ap2") — version strings, brand
names, and tag noise. We now ONLY accept categories that map to a
known curated bucket; everything else becomes "uncategorized", which
_consolidate_small_categories folds into "Other". Sidecar-declared
categories (skills.sh groupings) bypass this entirely via fixedCategory.
"""
if not tags:
return "uncategorized"
for tag in tags:
@ -488,8 +578,12 @@ def _guess_category(tags: list) -> str:
cat = TAG_TO_CATEGORY.get(tag.lower())
if cat:
return cat
first = tags[0] if isinstance(tags[0], str) else ""
return first.lower().replace(" ", "-") if first else "uncategorized"
# Also accept a tag that's already a known curated category key
# (e.g. a skill tagged literally "security" or "devops").
normalized = tag.lower().replace(" ", "-")
if normalized in CATEGORY_LABELS and normalized != "other":
return normalized
return "uncategorized"
MIN_CATEGORY_SIZE = 4

View File

@ -19,6 +19,10 @@ interface Skill {
docsPath?: string;
identifier?: string;
installCmd?: string;
/** Clickable URL to the skill's origin (repo / detail page). Synthesized
* in extract-skills.py for community skills that have no generated docs
* page, so the expanded card always has somewhere to send the user. */
sourceUrl?: string;
/** Lowercase pre-joined haystack used by the search filter.
* Built once at load time so per-keystroke filtering is a single
* `.includes()` per skill instead of array-join + toLowerCase on
@ -240,6 +244,47 @@ function highlightMatch(text: string, query: string): React.ReactNode {
);
}
function CopyButton({ text }: { text: string }) {
const [copied, setCopied] = useState(false);
const onCopy = useCallback(
(e: React.MouseEvent) => {
e.stopPropagation();
navigator.clipboard?.writeText(text).then(
() => {
setCopied(true);
setTimeout(() => setCopied(false), 1500);
},
() => {},
);
},
[text],
);
return (
<button
className={styles.copyBtn}
onClick={onCopy}
title="Copy install command"
aria-label="Copy install command"
>
{copied ? (
<svg viewBox="0 0 20 20" fill="currentColor" width="14" height="14">
<path
fillRule="evenodd"
d="M16.704 4.153a.75.75 0 01.143 1.052l-8 10.5a.75.75 0 01-1.127.075l-4.5-4.5a.75.75 0 011.06-1.06l3.894 3.893 7.48-9.817a.75.75 0 011.05-.143z"
clipRule="evenodd"
/>
</svg>
) : (
<svg viewBox="0 0 20 20" fill="currentColor" width="14" height="14">
<path d="M7 3.5A1.5 1.5 0 018.5 2h3.879a1.5 1.5 0 011.06.44l3.122 3.12A1.5 1.5 0 0117 6.622V12.5a1.5 1.5 0 01-1.5 1.5h-1v-3.379a3 3 0 00-.879-2.121L10.5 5.379A3 3 0 008.379 4.5H7v-1z" />
<path d="M4.5 6A1.5 1.5 0 003 7.5v9A1.5 1.5 0 004.5 18h7a1.5 1.5 0 001.5-1.5v-5.879a1.5 1.5 0 00-.44-1.06L9.44 6.439A1.5 1.5 0 008.378 6H4.5z" />
</svg>
)}
<span className={styles.copyBtnLabel}>{copied ? "Copied" : "Copy"}</span>
</button>
);
}
function SkillCard({
skill,
query,
@ -379,16 +424,31 @@ function SkillCard({
)}
<div className={styles.installHint}>
<code>{skill.installCmd || `hermes skills install ${skill.name}`}</code>
<CopyButton
text={skill.installCmd || `hermes skills install ${skill.name}`}
/>
</div>
<div className={styles.cardLinks}>
{skill.docsPath ? (
<a
className={styles.docsLink}
href={`/docs/user-guide/skills/${skill.docsPath}`}
onClick={(e) => e.stopPropagation()}
>
View full documentation
</a>
) : skill.sourceUrl ? (
<a
className={styles.docsLink}
href={skill.sourceUrl}
target="_blank"
rel="noopener noreferrer"
onClick={(e) => e.stopPropagation()}
>
View source
</a>
) : null}
</div>
{skill.docsPath && (
<a
className={styles.docsLink}
href={`/docs/user-guide/skills/${skill.docsPath}`}
onClick={(e) => e.stopPropagation()}
>
View full documentation
</a>
)}
</div>
)}
</div>
@ -792,7 +852,15 @@ export default function SkillsDashboard() {
</div>
)}
{visible.length > 0 ? (
{!data && !loadError ? (
<div className={styles.empty}>
<div className={styles.loadingSpinner} />
<h3 className={styles.emptyTitle}>Loading the catalog</h3>
<p className={styles.emptyDesc}>
Fetching 88k+ skills across every registry. One moment.
</p>
</div>
) : visible.length > 0 ? (
<>
<div className={styles.grid}>
{visible.map((skill, i) => {

View File

@ -628,6 +628,9 @@
background: rgba(0, 0, 0, 0.25);
border: 1px solid rgba(255, 215, 0, 0.06);
border-radius: 5px;
display: flex;
align-items: center;
gap: 0.5rem;
}
.installHint code {
@ -636,6 +639,64 @@
color: rgba(255, 215, 0, 0.7);
background: none;
padding: 0;
flex: 1;
overflow-x: auto;
white-space: nowrap;
scrollbar-width: none;
}
.installHint code::-webkit-scrollbar {
display: none;
}
.copyBtn {
display: inline-flex;
align-items: center;
gap: 0.25rem;
flex-shrink: 0;
padding: 0.2rem 0.45rem;
border: 1px solid rgba(255, 215, 0, 0.18);
border-radius: 4px;
background: rgba(255, 215, 0, 0.06);
color: rgba(255, 215, 0, 0.85);
font-size: 0.68rem;
font-weight: 600;
cursor: pointer;
transition: all 0.15s;
}
.copyBtn:hover {
background: rgba(255, 215, 0, 0.14);
color: rgba(255, 215, 0, 1);
}
.copyBtnLabel {
line-height: 1;
}
.cardLinks {
display: flex;
gap: 0.5rem;
}
.cardLinks .docsLink {
flex: 1;
}
.loadingSpinner {
width: 2.25rem;
height: 2.25rem;
margin: 0 auto 1rem;
border: 3px solid rgba(255, 215, 0, 0.15);
border-top-color: rgba(255, 215, 0, 0.7);
border-radius: 50%;
animation: skillsSpin 0.8s linear infinite;
}
@keyframes skillsSpin {
to {
transform: rotate(360deg);
}
}
.overviewBlock {