diff --git a/evolve_server/core/skill_registry.py b/evolve_server/core/skill_registry.py index 6633d65..afbaac1 100644 --- a/evolve_server/core/skill_registry.py +++ b/evolve_server/core/skill_registry.py @@ -17,6 +17,7 @@ import hashlib import json import logging +from copy import deepcopy from datetime import datetime, timezone from typing import Any, Optional @@ -106,6 +107,8 @@ def record_update( skill_name: str, content_sha: str, action: str = "create", + *, + bundle_record: Optional[dict[str, Any]] = None, ) -> int: """Record a content-changing update. Returns the new version number. @@ -120,15 +123,28 @@ def record_update( new_version = entry.get("version", 0) + 1 entry["version"] = new_version entry["content_sha"] = content_sha + if isinstance(bundle_record, dict): + for key in ("format", "entrypoint", "tree_sha256"): + if bundle_record.get(key): + entry[key] = bundle_record[key] + files = bundle_record.get("files") + if isinstance(files, list): + entry["files"] = deepcopy(files) history: list = entry.setdefault("history", []) - history.append( - { - "version": new_version, - "content_sha": content_sha, - "timestamp": datetime.now(timezone.utc).isoformat(), - "action": action, - } - ) + history_entry: dict[str, Any] = { + "version": new_version, + "content_sha": content_sha, + "timestamp": datetime.now(timezone.utc).isoformat(), + "action": action, + } + if isinstance(bundle_record, dict): + for key in ("format", "entrypoint", "tree_sha256"): + if bundle_record.get(key): + history_entry[key] = bundle_record[key] + files = bundle_record.get("files") + if isinstance(files, list): + history_entry["files"] = deepcopy(files) + history.append(history_entry) if len(history) > 20: entry["history"] = history[-20:] diff --git a/evolve_server/engines/EVOLVE_AGENTS.md b/evolve_server/engines/EVOLVE_AGENTS.md index e334493..6129d34 100644 --- a/evolve_server/engines/EVOLVE_AGENTS.md +++ b/evolve_server/engines/EVOLVE_AGENTS.md @@ -14,6 +14,9 @@ workspace/ ├── skills/ ← input+output: current skill library │ └── / │ ├── SKILL.md ← current version (refreshed from storage each round) +│ ├── references/ ← optional reference docs / prompts / notes +│ ├── scripts/ ← optional helper scripts / tooling +│ ├── assets/ ← optional templates / binaries / other assets │ └── history/ ← persistent across rounds only in `--no-fresh` mode │ ├── v1.md ← previous SKILL.md snapshot │ ├── v1_evidence.md @@ -30,10 +33,10 @@ workspace/ 2. **Analyze** the sessions: identify patterns, failures, successes, and which skills (if any) were referenced. 3. **Decide** what actions to take for each skill or pattern. -4. **Execute** by writing new or updated `SKILL.md` files in `skills/`. +4. **Execute** by writing new or updated skill bundles in `skills/`. Work through these steps autonomously. Use your file-reading and writing -tools to inspect session data and produce skill files. +tools to inspect session data and produce skill bundles. **File access boundary**: All your file operations MUST stay within this workspace directory. The workspace contains copies of all data you need — @@ -151,10 +154,16 @@ No action needed. Use when: ## Step 4: Execute — Write Skill Files ### For improve_skill / optimize_description: -Edit the existing `skills//SKILL.md` file in place. +Edit the existing `skills//` bundle in place. `SKILL.md` remains the +entrypoint, but you may also update supporting files such as +`references/`, `scripts/`, `assets/`, and `history/` when the evidence +shows the skill needs them. ### For create_skill: -Create a new directory `skills//SKILL.md`. +Create a new directory `skills//SKILL.md`. If the skill needs +supporting resources, you may also create additional files under +`references/`, `scripts/`, `assets/`, or other subdirectories inside the +same skill folder. ### SKILL.md Format diff --git a/evolve_server/engines/agent.py b/evolve_server/engines/agent.py index 5c7120b..d36245b 100644 --- a/evolve_server/engines/agent.py +++ b/evolve_server/engines/agent.py @@ -16,6 +16,11 @@ from typing import Any from skillclaw.object_store import build_object_store +from skillclaw.skill_bundle import ( + bundle_entrypoint_bytes, + bundle_file_records, + bundle_tree_sha256, +) from ..core.config import EvolveServerConfig from ..core.constants import SLUG_RE @@ -30,11 +35,13 @@ from ..storage.mock_bucket import LocalBucket from ..storage.oss_helpers import ( delete_session_keys, - fetch_skill_content, + fetch_skill_bundle, + list_object_keys, list_session_keys, load_manifest, read_json_object, save_manifest, + save_version_bundle, ) from .agent_workspace import AgentWorkspace from .agents_md import load_agents_md @@ -272,39 +279,75 @@ async def _drain_sessions(self) -> tuple[list[dict], list[str]]: def _load_remote_skills(self) -> dict[str, dict[str, Any]]: return load_manifest(self._bucket, self._prefix) - def _fetch_all_skills(self, manifest: dict[str, dict]) -> dict[str, str]: - """Fetch SKILL.md content for all skills in the manifest.""" - skills: dict[str, str] = {} - for name in manifest: - content = fetch_skill_content(self._bucket, self._prefix, name) - if content: - skills[name] = content + def _fetch_all_skills(self, manifest: dict[str, dict]) -> dict[str, dict[str, bytes]]: + """Fetch full bundle content for all skills in the manifest.""" + skills: dict[str, dict[str, bytes]] = {} + for name, record in manifest.items(): + bundle = fetch_skill_bundle(self._bucket, self._prefix, name, record) + if bundle: + skills[name] = bundle return skills # ================================================================= # # Upload evolved skills # # ================================================================= # - def _upload_skill(self, skill: dict, action: str = "create") -> None: + def _upload_skill( + self, + skill: dict, + bundle_files: dict[str, bytes], + action: str = "create", + ) -> None: name = skill.get("name", "") if not name: return skill_id = self._id_registry.get_or_create(name) - md_content = build_skill_md(skill) + if "SKILL.md" not in bundle_files: + bundle_files = {**bundle_files, "SKILL.md": build_skill_md(skill).encode("utf-8")} + md_bytes = bundle_entrypoint_bytes(bundle_files) object_key = f"{self._prefix}skills/{name}/SKILL.md" - self._bucket.put_object(object_key, md_content.encode("utf-8")) - - content_sha = hashlib.sha256(md_content.encode("utf-8")).hexdigest() - version = self._id_registry.record_update(name, content_sha, action=action) + self._bucket.put_object(object_key, md_bytes) + keep_bundle_keys: set[str] = set() + for rel_path, data in sorted(bundle_files.items()): + if rel_path == "SKILL.md": + continue + key = f"{self._prefix}skills/{name}/files/{rel_path}" + keep_bundle_keys.add(key) + self._bucket.put_object(key, data) + + for key in list_object_keys(self._bucket, f"{self._prefix}skills/{name}/files/"): + if key not in keep_bundle_keys: + self._bucket.delete_object(key) + + content_sha = hashlib.sha256(md_bytes).hexdigest() + tree_sha = bundle_tree_sha256(bundle_files) + bundle_record = { + "format": "bundle_v1", + "entrypoint": "SKILL.md", + "tree_sha256": tree_sha, + "files": bundle_file_records(bundle_files), + } + version = self._id_registry.record_update( + name, + content_sha, + action=action, + bundle_record=bundle_record, + ) + save_version_bundle(self._bucket, self._prefix, name, version, bundle_files) manifest = self._load_remote_skills() manifest[name] = { + **manifest.get(name, {}), "name": name, "skill_id": skill_id, "version": version, "sha256": content_sha, + "tree_sha256": tree_sha, + "format": "bundle_v1", + "entrypoint": "SKILL.md", + "files": bundle_record["files"], "uploaded_by": "evolve_server", "uploaded_at": datetime.now(timezone.utc).isoformat(), "description": skill.get("description", ""), @@ -412,7 +455,12 @@ async def run_once(self) -> dict: skill["name"] = name try: - await self._call_storage(self._upload_skill, skill, action) + await self._call_storage( + self._upload_skill, + skill, + change.get("bundle_files", {}), + action, + ) skills_evolved += 1 evolution_records.append( { diff --git a/evolve_server/engines/agent_workspace.py b/evolve_server/engines/agent_workspace.py index fe42f75..e98e1fe 100644 --- a/evolve_server/engines/agent_workspace.py +++ b/evolve_server/engines/agent_workspace.py @@ -3,7 +3,7 @@ Handles preparing the local workspace directory that OpenClaw operates on, snapshotting skill state before agent execution, and collecting changes -(new / modified SKILL.md files) after the agent finishes. +(new / modified skill bundles) after the agent finishes. Key design note on OpenClaw bootstrap integration: OpenClaw's ``ensureAgentWorkspace()`` creates template bootstrap files @@ -15,13 +15,19 @@ from __future__ import annotations -import hashlib import json import logging import shutil from pathlib import Path from typing import Any +from skillclaw.skill_bundle import ( + bundle_entrypoint_text, + bundle_tree_sha256, + read_skill_bundle, + write_skill_bundle, +) + from ..core.utils import parse_skill_content logger = logging.getLogger(__name__) @@ -56,6 +62,9 @@ ├── skills/ ← current skill library (read + write) │ └── / │ ├── SKILL.md +│ ├── references/ +│ ├── scripts/ +│ ├── assets/ │ └── history/ ├── manifest.json ← skill manifest (read-only) └── skill_registry.json @@ -65,7 +74,9 @@ - **All file operations** stay within this workspace directory. - Do NOT modify `sessions/`, `manifest.json`, or `skill_registry.json`. -- Write changes only to `skills//SKILL.md` (and `history/`). +- Write changes only inside `skills//` bundles. +- You may inspect and edit `SKILL.md`, `references/`, `scripts/`, `assets/`, + `history/`, and other supporting files that belong to a skill. - If there are no actionable patterns, make no changes — that is fine. ## Memory @@ -108,7 +119,7 @@ def reset(self) -> None: def prepare( self, sessions: list[dict], - existing_skills: dict[str, str], + existing_skills: dict[str, str | dict[str, bytes | bytearray | str]], manifest: dict[str, dict], agents_md: str, skill_registry_info: dict[str, Any] | None = None, @@ -120,7 +131,8 @@ def prepare( sessions: Raw session dicts drained from storage. existing_skills: - ``{skill_name: SKILL.md content}`` for all current skills. + ``{skill_name: bundle}`` for all current skills, where bundle is + either a raw ``SKILL.md`` string or ``{rel_path: bytes}``. manifest: Current manifest dict ``{skill_name: metadata}``. agents_md: @@ -152,10 +164,15 @@ def prepare( # Write existing skills self.skills_dir.mkdir(parents=True, exist_ok=True) + for path in sorted(self.skills_dir.iterdir()): + if path.is_dir() and path.name not in existing_skills: + shutil.rmtree(path) for name, content in existing_skills.items(): skill_dir = self.skills_dir / name - skill_dir.mkdir(parents=True, exist_ok=True) - (skill_dir / "SKILL.md").write_text(content, encoding="utf-8") + if isinstance(content, dict): + write_skill_bundle(skill_dir, content, clean=True) + else: + write_skill_bundle(skill_dir, {"SKILL.md": content}, clean=True) # Write manifest manifest_path = self.root / "manifest.json" @@ -209,17 +226,16 @@ def prepare( ) def snapshot_skills(self) -> dict[str, str]: - """Return ``{skill_name: sha256_of_content}`` for all skills in the workspace.""" + """Return ``{skill_name: tree_sha256}`` for all skills in the workspace.""" snapshot: dict[str, str] = {} if not self.skills_dir.exists(): return snapshot for skill_dir in sorted(self.skills_dir.iterdir()): if not skill_dir.is_dir(): continue - skill_md = skill_dir / "SKILL.md" - if skill_md.is_file(): - content = skill_md.read_bytes() - snapshot[skill_dir.name] = hashlib.sha256(content).hexdigest() + bundle = read_skill_bundle(skill_dir) + if "SKILL.md" in bundle: + snapshot[skill_dir.name] = bundle_tree_sha256(bundle) return snapshot def collect_changes( @@ -233,6 +249,8 @@ def collect_changes( - ``action``: ``"create"`` or ``"improve"`` - ``skill``: parsed skill dict (name, description, content, ...) - ``raw_md``: the raw SKILL.md text + - ``bundle_files``: full bundle contents ``{rel_path: bytes}`` + - ``tree_sha256``: directory-level fingerprint """ after_snapshot = self.snapshot_skills() changes: list[dict[str, Any]] = [] @@ -242,8 +260,15 @@ def collect_changes( if before_sha == after_sha: continue - skill_md_path = self.skills_dir / name / "SKILL.md" - raw_md = skill_md_path.read_text(encoding="utf-8") + bundle_files = read_skill_bundle(self.skills_dir / name) + if "SKILL.md" not in bundle_files: + logger.warning( + "[AgentWorkspace] changed skill '%s' is missing SKILL.md; skipping", + name, + ) + continue + + raw_md = bundle_entrypoint_text(bundle_files) parsed = parse_skill_content(name, raw_md) action = "create" if before_sha is None else "improve" @@ -253,6 +278,8 @@ def collect_changes( "action": action, "skill": parsed, "raw_md": raw_md, + "bundle_files": bundle_files, + "tree_sha256": after_sha, } ) logger.info( diff --git a/evolve_server/engines/workflow.py b/evolve_server/engines/workflow.py index 4a15f76..03e8598 100644 --- a/evolve_server/engines/workflow.py +++ b/evolve_server/engines/workflow.py @@ -23,6 +23,7 @@ from typing import Any, Optional from skillclaw.object_store import build_object_store +from skillclaw.skill_bundle import bundle_tree_sha256 from skillclaw.validation_store import ValidationStore from ..core.config import EvolveServerConfig @@ -47,6 +48,7 @@ load_manifest, read_json_object, save_manifest, + save_version_bundle, ) logger = logging.getLogger(__name__) @@ -178,10 +180,24 @@ def _upload_skill(self, skill: dict, action: str) -> None: skill_id = self._id_registry.get_or_create(name) md_content = build_skill_md(skill) object_key = f"{self._prefix}skills/{name}/SKILL.md" - self._bucket.put_object(object_key, md_content.encode("utf-8")) - - content_sha = hashlib.sha256(md_content.encode("utf-8")).hexdigest() - version = self._id_registry.record_update(name, content_sha, action=action) + md_bytes = md_content.encode("utf-8") + self._bucket.put_object(object_key, md_bytes) + + content_sha = hashlib.sha256(md_bytes).hexdigest() + tree_sha = bundle_tree_sha256({"SKILL.md": md_bytes}) + bundle_record = { + "format": "bundle_v1", + "entrypoint": "SKILL.md", + "tree_sha256": tree_sha, + "files": [{"path": "SKILL.md", "sha256": content_sha, "size": len(md_bytes)}], + } + version = self._id_registry.record_update( + name, + content_sha, + action=action, + bundle_record=bundle_record, + ) + save_version_bundle(self._bucket, self._prefix, name, version, {"SKILL.md": md_bytes}) manifest = self._load_remote_skills() manifest[name] = { @@ -189,6 +205,10 @@ def _upload_skill(self, skill: dict, action: str) -> None: "skill_id": skill_id, "version": version, "sha256": content_sha, + "tree_sha256": tree_sha, + "format": "bundle_v1", + "entrypoint": "SKILL.md", + "files": bundle_record["files"], "uploaded_by": "evolve_server", "uploaded_at": datetime.now(timezone.utc).isoformat(), "description": skill.get("description", ""), diff --git a/evolve_server/storage/oss_helpers.py b/evolve_server/storage/oss_helpers.py index f419905..ad32bfc 100644 --- a/evolve_server/storage/oss_helpers.py +++ b/evolve_server/storage/oss_helpers.py @@ -7,6 +7,12 @@ from typing import Any, Optional from skillclaw.object_store import build_object_store +from skillclaw.skill_bundle import ( + bundle_entrypoint_text, + bundle_file_records, + bundle_tree_sha256, + coerce_skill_bundle, +) logger = logging.getLogger(__name__) @@ -42,6 +48,22 @@ def list_session_keys(bucket, prefix: str) -> list[str]: return keys +def list_object_keys(bucket, prefix: str) -> list[str]: + """List all object keys under *prefix* across local/OSS backends.""" + if hasattr(bucket, "iter_objects"): + iterator = bucket.iter_objects(prefix=prefix) + else: + from .mock_bucket import LocalBucket, LocalObjectIterator + + if isinstance(bucket, LocalBucket): + iterator = LocalObjectIterator(bucket, prefix=prefix) + else: + import oss2 + + iterator = oss2.ObjectIterator(bucket, prefix=prefix) + return [obj.key for obj in iterator] + + def read_json_object(bucket, key: str) -> Optional[dict]: """Download and parse a single JSON object from storage.""" try: @@ -105,3 +127,137 @@ def fetch_skill_content(bucket, prefix: str, skill_name: str) -> Optional[str]: return bucket.get_object(key).read().decode("utf-8") except Exception: return None + + +def fetch_skill_bundle( + bucket, + prefix: str, + skill_name: str, + record: Optional[dict[str, Any]] = None, +) -> dict[str, bytes]: + """Download a full skill bundle from storage. + + Backward compatibility: + - bundle-aware records read nested files from ``skills//files/...`` + - legacy records fall back to a single ``SKILL.md`` + """ + bundle: dict[str, bytes] = {} + file_entries = (record or {}).get("files") + if isinstance(file_entries, list) and file_entries: + for item in file_entries: + rel_path = str((item or {}).get("path") or "").strip().replace("\\", "/") + if not rel_path: + continue + if rel_path == "SKILL.md": + key = f"{prefix}skills/{skill_name}/SKILL.md" + else: + key = f"{prefix}skills/{skill_name}/files/{rel_path}" + bundle[rel_path] = bucket.get_object(key).read() + return bundle + + content = fetch_skill_content(bucket, prefix, skill_name) + if content is None: + return {} + bundle["SKILL.md"] = content.encode("utf-8") + return bundle + + +def fetch_skill_bundle_text( + bucket, + prefix: str, + skill_name: str, + record: Optional[dict[str, Any]] = None, +) -> Optional[str]: + """Download the bundle and return its ``SKILL.md`` entrypoint text.""" + bundle = fetch_skill_bundle(bucket, prefix, skill_name, record) + if not bundle: + return None + try: + return bundle_entrypoint_text(bundle) + except Exception: + return None + + +def skill_version_prefix(prefix: str, skill_name: str, version: int) -> str: + return f"{prefix}skills/{skill_name}/versions/v{max(1, int(version or 1))}/" + + +def skill_version_bundle_key(prefix: str, skill_name: str, version: int, rel_path: str) -> str: + clean = str(rel_path or "").strip().replace("\\", "/") + base = skill_version_prefix(prefix, skill_name, version) + if clean == "SKILL.md": + return f"{base}SKILL.md" + return f"{base}files/{clean}" + + +def skill_version_record_key(prefix: str, skill_name: str, version: int) -> str: + return f"{skill_version_prefix(prefix, skill_name, version)}bundle.json" + + +def save_version_bundle( + bucket, + prefix: str, + skill_name: str, + version: int, + bundle_files: dict[str, bytes], +) -> dict[str, Any]: + bundle = coerce_skill_bundle(bundle_files) + record = { + "format": "bundle_v1", + "entrypoint": "SKILL.md", + "tree_sha256": bundle_tree_sha256(bundle), + "files": bundle_file_records(bundle), + } + keep_keys: set[str] = set() + for rel_path, data in sorted(bundle.items()): + key = skill_version_bundle_key(prefix, skill_name, version, rel_path) + keep_keys.add(key) + bucket.put_object(key, data) + for key in list_object_keys(bucket, f"{skill_version_prefix(prefix, skill_name, version)}files/"): + if key not in keep_keys: + bucket.delete_object(key) + bucket.put_object( + skill_version_record_key(prefix, skill_name, version), + json.dumps(record, ensure_ascii=False, indent=2).encode("utf-8"), + ) + return record + + +def load_version_bundle_record( + bucket, + prefix: str, + skill_name: str, + version: int, +) -> Optional[dict[str, Any]]: + try: + payload = bucket.get_object(skill_version_record_key(prefix, skill_name, version)).read().decode("utf-8") + data = json.loads(payload) + return data if isinstance(data, dict) else None + except Exception: + return None + + +def fetch_version_bundle( + bucket, + prefix: str, + skill_name: str, + version: int, + record: Optional[dict[str, Any]] = None, +) -> dict[str, bytes]: + bundle: dict[str, bytes] = {} + version_record = record or load_version_bundle_record(bucket, prefix, skill_name, version) or {} + file_entries = version_record.get("files") + if isinstance(file_entries, list) and file_entries: + for item in file_entries: + rel_path = str((item or {}).get("path") or "").strip().replace("\\", "/") + if not rel_path: + continue + key = skill_version_bundle_key(prefix, skill_name, version, rel_path) + bundle[rel_path] = bucket.get_object(key).read() + return bundle + + try: + bundle["SKILL.md"] = bucket.get_object(skill_version_bundle_key(prefix, skill_name, version, "SKILL.md")).read() + except Exception: + return {} + return bundle diff --git a/skillclaw/api_server.py b/skillclaw/api_server.py index 0a2ecc4..7855574 100644 --- a/skillclaw/api_server.py +++ b/skillclaw/api_server.py @@ -129,7 +129,10 @@ def _normalize_assistant_content_parts(content: list[dict]) -> tuple[str, list[d _HERMES_SKILL_WRITE_TOOL_NAMES = {"skill_manage"} _SHELL_TOOL_NAMES = {"shell", "exec", "bash", "terminal"} _PATCH_PATH_RE = re.compile(r"^\*\*\* (?:Add|Update|Delete) File: (.+)$", re.MULTILINE) -_SHELL_SKILL_PATH_RE = re.compile(r"([~./A-Za-z0-9_\-][^\n\"'`]*?SKILL\.md)") +_SHELL_SKILL_PATH_RE = re.compile( + r"([~./A-Za-z0-9_\-][^\n\"'`]*?" + r"(?:SKILL\.md|references/[^\s\"'`]+|scripts/[^\s\"'`]+|assets/[^\s\"'`]+|history/[^\s\"'`]+))" +) def _extract_skill_names(items: list[Any] | None) -> set[str]: @@ -242,12 +245,24 @@ def _deduplicate_paths(paths: list[str]) -> list[str]: return out +def _looks_like_path(value: str) -> bool: + text = str(value or "").strip() + if not text or text in {".", ".."}: + return False + return ( + "/" in text + or "\\" in text + or text.startswith("~") + or text.endswith("SKILL.md") + ) + + def _extract_skill_paths_from_patch(raw_text: str) -> list[str]: return _deduplicate_paths( [ match.group(1).strip() for match in _PATCH_PATH_RE.finditer(str(raw_text or "")) - if match.group(1).strip().endswith("SKILL.md") + if match.group(1).strip() ] ) @@ -257,7 +272,7 @@ def _extract_skill_paths_from_shell(command: str) -> list[str]: [ match.group(1).strip() for match in _SHELL_SKILL_PATH_RE.finditer(str(command or "")) - if match.group(1).strip().endswith("SKILL.md") + if match.group(1).strip() ] ) @@ -278,13 +293,13 @@ def _extract_skill_paths_from_args_dict(args: dict[str, Any]) -> list[str]: "new_path", ): value = args.get(key) - if isinstance(value, str) and value.strip().endswith("SKILL.md"): + if isinstance(value, str) and _looks_like_path(value): paths.append(value.strip()) raw_paths = args.get("paths") if isinstance(raw_paths, list): for item in raw_paths: - if isinstance(item, str) and item.strip().endswith("SKILL.md"): + if isinstance(item, str) and _looks_like_path(item): paths.append(item.strip()) return _deduplicate_paths(paths) @@ -320,8 +335,8 @@ def _extract_skill_paths_from_tool_call(tool_call: dict) -> tuple[str, list[str] return tool_name, _deduplicate_paths(paths) -def _extract_hermes_skill_name_from_tool_call(tool_call: dict) -> tuple[str, str]: - """Extract Hermes-native skill names from skill_view / skill_manage calls.""" +def _extract_hermes_skill_name_from_tool_call(tool_call: dict) -> tuple[str, str, str]: + """Extract Hermes-native skill name + relative file path from skill calls.""" func = tool_call.get("function", {}) if isinstance(tool_call, dict) else {} tool_name = _normalize_tool_call_name(func.get("name") or "") args_raw = func.get("arguments", "{}") @@ -337,13 +352,19 @@ def _extract_hermes_skill_name_from_tool_call(tool_call: dict) -> tuple[str, str args_obj = {} if not isinstance(args_obj, dict): - return tool_name, "" + return tool_name, "", "" + rel_path = "" + for key in ("file_path", "path"): + value = args_obj.get(key) + if isinstance(value, str) and value.strip(): + rel_path = value.strip() + break for key in ("skill_name", "name", "skill"): value = args_obj.get(key) if isinstance(value, str) and value.strip(): - return tool_name, value.strip() - return tool_name, "" + return tool_name, value.strip(), rel_path + return tool_name, "", rel_path def _resolve_skill_reference( @@ -363,7 +384,7 @@ def _resolve_skill_reference( } return { "skill_id": "", - "skill_name": os.path.basename(os.path.dirname(expanded or str(path or "").strip())), + "skill_name": "", "path": str(path or "").strip(), } @@ -371,10 +392,24 @@ def _resolve_skill_reference( def _resolve_skill_reference_by_name( skill_name: str, skill_path_map: dict[str, dict[str, str]], + rel_path: str = "", ) -> dict[str, str]: clean_name = str(skill_name or "").strip() if not clean_name: return {"skill_id": "", "skill_name": "", "path": ""} + normalized_rel = str(rel_path or "").strip().replace("\\", "/").lstrip("./") + if normalized_rel: + suffix = f"/{normalized_rel}" + for path, skill_info in skill_path_map.items(): + if str(skill_info.get("skill_name", "") or "").strip() != clean_name: + continue + candidate = str(path or "").replace("\\", "/") + if candidate.endswith(suffix) or candidate == normalized_rel: + return { + "skill_id": str(skill_info.get("skill_id", "") or ""), + "skill_name": clean_name, + "path": str(path or ""), + } for path, skill_info in skill_path_map.items(): if str(skill_info.get("skill_name", "") or "").strip() == clean_name: return { @@ -825,10 +860,10 @@ def _extract_read_skills_from_tool_calls( tool_calls: list[dict], skill_path_map: dict[str, dict[str, str]], ) -> list[dict]: - """Identify which SKILL.md files were read from the model's tool_calls. + """Identify which skill bundle files were read from the model's tool_calls. Returns a list of ``{"skill_id": ..., "skill_name": ...}`` dicts for - each ``read`` tool call whose ``path`` argument points to a SKILL.md. + each ``read`` tool call whose ``path`` argument points inside a skill. """ read_skills: list[dict] = [] seen_ids: set[str] = set() @@ -836,8 +871,8 @@ def _extract_read_skills_from_tool_calls( tool_name, skill_paths = _extract_skill_paths_from_tool_call(tc) normalized = tool_name.lower() if normalized in _HERMES_SKILL_READ_TOOL_NAMES: - _, skill_name = _extract_hermes_skill_name_from_tool_call(tc) - skill_ref = _resolve_skill_reference_by_name(skill_name, skill_path_map) + _, skill_name, rel_path = _extract_hermes_skill_name_from_tool_call(tc) + skill_ref = _resolve_skill_reference_by_name(skill_name, skill_path_map, rel_path) dedupe_key = skill_ref.get("skill_id") or skill_ref.get("skill_name") if dedupe_key and dedupe_key not in seen_ids: read_skills.append(skill_ref) @@ -847,6 +882,8 @@ def _extract_read_skills_from_tool_calls( continue for path in skill_paths: skill_ref = _resolve_skill_reference(path, skill_path_map) + if not skill_ref.get("skill_id") and not skill_ref.get("skill_name"): + continue dedupe_key = skill_ref.get("skill_id") or skill_ref.get("path") or skill_ref.get("skill_name") if not dedupe_key or dedupe_key in seen_ids: continue @@ -860,7 +897,7 @@ def _extract_modified_skills_from_tool_calls( tool_calls: list[dict], skill_path_map: dict[str, dict[str, str]], ) -> list[dict]: - """Identify SKILL.md files the model attempted to write or update.""" + """Identify skill bundle files the model attempted to write or update.""" modified_skills: list[dict] = [] seen_ids: set[str] = set() for tc in tool_calls: @@ -869,8 +906,8 @@ def _extract_modified_skills_from_tool_calls( if normalized in _READ_TOOL_NAMES: continue if normalized in _HERMES_SKILL_WRITE_TOOL_NAMES: - _, skill_name = _extract_hermes_skill_name_from_tool_call(tc) - skill_ref = _resolve_skill_reference_by_name(skill_name, skill_path_map) + _, skill_name, rel_path = _extract_hermes_skill_name_from_tool_call(tc) + skill_ref = _resolve_skill_reference_by_name(skill_name, skill_path_map, rel_path) dedupe_key = skill_ref.get("skill_id") or skill_ref.get("skill_name") if dedupe_key and dedupe_key not in seen_ids: modified_skills.append({**skill_ref, "action": normalized}) @@ -880,6 +917,8 @@ def _extract_modified_skills_from_tool_calls( continue for path in skill_paths: skill_ref = _resolve_skill_reference(path, skill_path_map) + if not skill_ref.get("skill_id") and not skill_ref.get("skill_name"): + continue dedupe_key = skill_ref.get("skill_id") or skill_ref.get("path") or skill_ref.get("skill_name") if not dedupe_key or dedupe_key in seen_ids: continue diff --git a/skillclaw/dashboard_assets/app.js b/skillclaw/dashboard_assets/app.js index 508a9ff..7d227e3 100644 --- a/skillclaw/dashboard_assets/app.js +++ b/skillclaw/dashboard_assets/app.js @@ -526,6 +526,9 @@ function compareLocalAndRemote(skill) { tone: "neutral", } } + if (skill.local_tree_sha && skill.remote_tree_sha && skill.local_tree_sha === skill.remote_tree_sha) { + return { key: "synced", label: l("已与共享正式版同步", "Synced with Shared Official Version"), tone: "published" } + } if (skill.local_sha && skill.remote_sha && skill.local_sha === skill.remote_sha) { return { key: "synced", label: l("已与共享正式版同步", "Synced with Shared Official Version"), tone: "published" } } diff --git a/skillclaw/dashboard_ingest.py b/skillclaw/dashboard_ingest.py index 87bd479..3831f12 100644 --- a/skillclaw/dashboard_ingest.py +++ b/skillclaw/dashboard_ingest.py @@ -17,6 +17,8 @@ from evolve_server.core.skill_registry import SkillIDRegistry from evolve_server.core.utils import build_skill_md +from evolve_server.storage.oss_helpers import fetch_version_bundle, load_version_bundle_record +from skillclaw.skill_bundle import bundle_entrypoint_text, read_skill_bundle_with_meta from .config import SkillClawConfig from .skill_hub import SkillHub @@ -81,11 +83,20 @@ def _hash_text(text: str) -> str: return _hash_bytes(text.encode("utf-8")) -def _compute_file_sha(path: Path) -> str: - try: - return hashlib.sha256(path.read_bytes()).hexdigest() - except OSError: - return "" +def _bundle_record( + *, + tree_sha256: str = "", + files: Any = None, + format_name: str = "bundle_v1", + entrypoint: str = "SKILL.md", +) -> dict[str, Any]: + normalized_files = [dict(item) for item in files if isinstance(item, dict)] if isinstance(files, list) else [] + return { + "format": str(format_name or "bundle_v1"), + "entrypoint": str(entrypoint or "SKILL.md"), + "tree_sha256": str(tree_sha256 or ""), + "files": normalized_files, + } def _truncate(text: str, limit: int = 180) -> str: @@ -231,7 +242,11 @@ def _load_local_skills(config: SkillClawConfig, warnings: list[str]) -> dict[str skills: dict[str, dict[str, Any]] = {} for skill_path in sorted(skills_dir.rglob("SKILL.md")): - raw = _read_text(skill_path) + bundle_files, bundle_records, local_tree_sha = read_skill_bundle_with_meta(skill_path.parent) + try: + raw = bundle_entrypoint_text(bundle_files) + except Exception: + raw = _read_text(skill_path) if not raw: warnings.append(f"failed to read local skill file: {skill_path}") continue @@ -251,7 +266,11 @@ def _load_local_skills(config: SkillClawConfig, warnings: list[str]) -> dict[str mtime = datetime.fromtimestamp(skill_path.stat().st_mtime, tz=timezone.utc).isoformat() except OSError: pass - local_sha = _compute_file_sha(skill_path) + local_sha = _hash_text(raw) + local_bundle_record = _bundle_record( + tree_sha256=local_tree_sha, + files=bundle_records, + ) skills[name] = { "name": name, @@ -275,6 +294,9 @@ def _load_local_skills(config: SkillClawConfig, warnings: list[str]) -> dict[str "current_sha": local_sha, "local_sha": local_sha, "remote_sha": "", + "current_tree_sha": local_tree_sha, + "local_tree_sha": local_tree_sha, + "remote_tree_sha": "", "local_inject_count": int(stat.get("inject_count", 0) or 0), "observed_injection_count": 0, "read_count": 0, @@ -289,6 +311,8 @@ def _load_local_skills(config: SkillClawConfig, warnings: list[str]) -> dict[str "manifest": {}, "registry": {}, "versions": [], + "local_bundle_record": local_bundle_record, + "remote_bundle_record": {}, } return skills @@ -893,19 +917,70 @@ def _load_shared_skills( history = [] enriched_history: list[dict[str, Any]] = [] current_sha = str(registry_entry.get("content_sha") or record.get("sha256") or (_hash_text(raw) if raw else "")) - current_version = int(registry_entry.get("version", 0) or 0) + current_version = int(registry_entry.get("version", 0) or record.get("version", 0) or 0) if current_version <= 0 and current_sha: current_version = 1 + remote_bundle_record = _bundle_record( + tree_sha256=str(record.get("tree_sha256") or registry_entry.get("tree_sha256") or ""), + files=record.get("files") or registry_entry.get("files") or [], + format_name=str(record.get("format") or registry_entry.get("format") or "bundle_v1"), + entrypoint=str(record.get("entrypoint") or registry_entry.get("entrypoint") or "SKILL.md"), + ) + current_tree_sha = str(remote_bundle_record.get("tree_sha256") or current_sha) history_latest = "" for item in history: if isinstance(item, dict): version_entry = dict(item) + version_num = int(version_entry.get("version", 0) or 0) + version_bundle_record = _bundle_record( + tree_sha256=str(version_entry.get("tree_sha256", "") or ""), + files=version_entry.get("files") or [], + format_name=str(version_entry.get("format") or "bundle_v1"), + entrypoint=str(version_entry.get("entrypoint") or "SKILL.md"), + ) + if (not version_bundle_record["files"] or not version_bundle_record["tree_sha256"]) and version_num > 0: + persisted_bundle_record = load_version_bundle_record(hub._bucket, hub._prefix(), name, version_num) + if isinstance(persisted_bundle_record, dict): + version_bundle_record = _bundle_record( + tree_sha256=str( + persisted_bundle_record.get("tree_sha256") + or version_bundle_record.get("tree_sha256") + or "" + ), + files=persisted_bundle_record.get("files") or version_bundle_record.get("files") or [], + format_name=str( + persisted_bundle_record.get("format") + or version_bundle_record.get("format") + or "bundle_v1" + ), + entrypoint=str( + persisted_bundle_record.get("entrypoint") + or version_bundle_record.get("entrypoint") + or "SKILL.md" + ), + ) content_sha = str(version_entry.get("content_sha", "") or "") snapshot_md = "" if content_sha: snapshot_md = candidate_docs_by_skill.get(name, {}).get(content_sha, "") if not snapshot_md and content_sha and raw and content_sha == current_sha: snapshot_md = raw + if not snapshot_md and version_num > 0 and version_bundle_record.get("files"): + try: + snapshot_bundle = fetch_version_bundle( + hub._bucket, + hub._prefix(), + name, + version_num, + version_bundle_record, + ) + except Exception: + snapshot_bundle = {} + if snapshot_bundle: + try: + snapshot_md = bundle_entrypoint_text(snapshot_bundle) + except Exception: + snapshot_md = "" if snapshot_md: parsed_snapshot = _parse_skill_document( snapshot_md, @@ -914,6 +989,12 @@ def _load_shared_skills( ) version_entry["skill_md"] = snapshot_md version_entry["content"] = str(parsed_snapshot.get("content") or "") + if version_bundle_record.get("files"): + version_entry["bundle_record"] = version_bundle_record + version_entry["tree_sha256"] = str(version_bundle_record.get("tree_sha256") or "") + version_entry["format"] = str(version_bundle_record.get("format") or "bundle_v1") + version_entry["entrypoint"] = str(version_bundle_record.get("entrypoint") or "SKILL.md") + version_entry["files"] = list(version_bundle_record.get("files") or []) enriched_history.append(version_entry) history_latest = _latest_timestamp(history_latest, str(version_entry.get("timestamp", "") or "")) @@ -945,6 +1026,9 @@ def _load_shared_skills( "current_sha": current_sha, "local_sha": "", "remote_sha": current_sha, + "current_tree_sha": current_tree_sha, + "local_tree_sha": "", + "remote_tree_sha": current_tree_sha, "local_inject_count": 0, "observed_injection_count": 0, "read_count": 0, @@ -959,6 +1043,8 @@ def _load_shared_skills( "manifest": record, "registry": registry_entry, "versions": enriched_history, + "local_bundle_record": {}, + "remote_bundle_record": remote_bundle_record, } sessions: list[dict[str, Any]] = [] @@ -1022,15 +1108,24 @@ def build_dashboard_snapshot(config: SkillClawConfig) -> dict[str, Any]: shared_skill.get("remote_updated_at", "") or shared_skill.get("updated_at", "") or "" ) current["remote_sha"] = str(shared_skill.get("remote_sha", "") or shared_skill.get("current_sha", "") or "") + current["local_tree_sha"] = str(current.get("local_tree_sha", "") or current.get("current_tree_sha", "") or "") + current["remote_tree_sha"] = str( + shared_skill.get("remote_tree_sha", "") or shared_skill.get("current_tree_sha", "") or "" + ) current["current_version"] = int( shared_skill.get("current_version", 0) or current.get("current_version", 0) or 0 ) current["current_sha"] = str(shared_skill.get("current_sha", "") or current.get("current_sha", "")) + current["current_tree_sha"] = str( + shared_skill.get("current_tree_sha", "") or current.get("current_tree_sha", "") or current["current_sha"] + ) current["manifest"] = shared_skill.get("manifest") or {} current["registry"] = shared_skill.get("registry") or {} current["versions"] = list(shared_skill.get("versions") or []) current["remote_skill_md"] = shared_skill.get("skill_md", "") current["remote_content"] = shared_skill.get("content", "") + current["remote_bundle_record"] = shared_skill.get("remote_bundle_record") or {} + current["local_bundle_record"] = current.get("local_bundle_record") or {} if not current.get("description"): current["description"] = str(shared_skill.get("description", "") or "") if not current.get("metadata"): @@ -1165,6 +1260,9 @@ def build_dashboard_snapshot(config: SkillClawConfig) -> dict[str, Any]: "current_sha": str(registry_entry.get("content_sha", "") or ""), "local_sha": "", "remote_sha": str(registry_entry.get("content_sha", "") or ""), + "current_tree_sha": str(registry_entry.get("tree_sha256", "") or ""), + "local_tree_sha": "", + "remote_tree_sha": str(registry_entry.get("tree_sha256", "") or ""), "local_inject_count": 0, "observed_injection_count": 0, "read_count": 0, @@ -1181,6 +1279,13 @@ def build_dashboard_snapshot(config: SkillClawConfig) -> dict[str, Any]: "versions": ( list(registry_entry.get("history") or []) if isinstance(registry_entry.get("history"), list) else [] ), + "local_bundle_record": {}, + "remote_bundle_record": _bundle_record( + tree_sha256=str(registry_entry.get("tree_sha256", "") or ""), + files=registry_entry.get("files") or [], + format_name=str(registry_entry.get("format") or "bundle_v1"), + entrypoint=str(registry_entry.get("entrypoint") or "SKILL.md"), + ), } skills_by_name[name] = skill @@ -1198,12 +1303,14 @@ def build_dashboard_snapshot(config: SkillClawConfig) -> dict[str, Any]: { "version": int(skill.get("current_version", 0) or 1), "content_sha": str(skill.get("current_sha", "") or ""), + "tree_sha256": str(skill.get("current_tree_sha", "") or ""), "timestamp": str( skill.get("updated_at") or skill.get("uploaded_at") or skill.get("last_injected_at") or "" ), "action": "snapshot", "skill_md": str(skill.get("remote_skill_md") or skill.get("skill_md") or ""), "content": str(skill.get("remote_content") or skill.get("content") or ""), + "bundle_record": skill.get("remote_bundle_record") or skill.get("local_bundle_record") or {}, } ] skill["versions"] = versions diff --git a/skillclaw/dashboard_server.py b/skillclaw/dashboard_server.py index e1cfe00..ab405cd 100644 --- a/skillclaw/dashboard_server.py +++ b/skillclaw/dashboard_server.py @@ -16,9 +16,12 @@ from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles +from evolve_server.storage.oss_helpers import fetch_skill_bundle, fetch_version_bundle + from .config import SkillClawConfig from .dashboard_ingest import build_dashboard_snapshot from .dashboard_store import DashboardStore +from .skill_bundle import write_skill_bundle from .skill_hub import SkillHub logger = logging.getLogger(__name__) @@ -117,6 +120,52 @@ def sync(self) -> dict[str, Any]: "overview": self.store.get_overview(), } + def _skill_root_dir(self, skill: dict[str, Any], skill_name: str) -> Path: + local_path = Path(str(skill.get("local_path", "") or "")).expanduser() + if str(local_path).strip() and local_path.name == "SKILL.md": + return local_path.parent + return Path(self.config.skills_dir).expanduser() / skill_name + + @staticmethod + def _bundle_record(payload: Any) -> dict[str, Any]: + if not isinstance(payload, dict): + return {} + files = payload.get("files") + if not isinstance(files, list): + files = [] + return { + "format": str(payload.get("format") or "bundle_v1"), + "entrypoint": str(payload.get("entrypoint") or "SKILL.md"), + "tree_sha256": str(payload.get("tree_sha256") or ""), + "files": [dict(item) for item in files if isinstance(item, dict)], + } + + @staticmethod + def _requires_full_bundle(record: dict[str, Any]) -> bool: + files = record.get("files") + return isinstance(files, list) and len(files) > 1 + + def _write_document_version(self, skill_root: Path, document: str) -> None: + skill_root.mkdir(parents=True, exist_ok=True) + (skill_root / "SKILL.md").write_text(document.rstrip() + "\n", encoding="utf-8") + + def _activate_shared_bundle( + self, + skill_name: str, + skill_root: Path, + *, + version: int | None = None, + bundle_record: dict[str, Any], + ) -> None: + hub = _require_sharing_hub(self.config) + if version is None: + bundle_files = fetch_skill_bundle(hub._bucket, hub._prefix(), skill_name, bundle_record) + else: + bundle_files = fetch_version_bundle(hub._bucket, hub._prefix(), skill_name, version, bundle_record) + if not bundle_files: + raise ValueError("bundle snapshot is unavailable for the selected version") + write_skill_bundle(skill_root, bundle_files, clean=True) + def _embedded_evolve_server(self): from evolve_server.core.config import EvolveServerConfig from evolve_server.engines.workflow import EvolveServer @@ -266,13 +315,22 @@ def activate_skill_version(self, skill_id: str, *, target: str) -> dict[str, Any if not selected_target: raise ValueError("'target' is required") + skill_root = self._skill_root_dir(skill, skill_name) document = "" label = "" + activated_bundle = False if selected_target == "local-current": document = str(skill.get("skill_md") or skill.get("content") or "").strip() label = "本地当前版本" + self._write_document_version(skill_root, document) elif selected_target == "shared-current": - document = str(skill.get("remote_skill_md") or skill.get("remote_content") or "").strip() + bundle_record = self._bundle_record(skill.get("remote_bundle_record")) + if self._requires_full_bundle(bundle_record): + self._activate_shared_bundle(skill_name, skill_root, bundle_record=bundle_record) + activated_bundle = True + else: + document = str(skill.get("remote_skill_md") or skill.get("remote_content") or "").strip() + self._write_document_version(skill_root, document) label = "共享当前版本" elif selected_target.startswith("shared-version:"): raw_version = selected_target.split(":", 1)[1].strip() @@ -291,20 +349,30 @@ def activate_skill_version(self, skill_id: str, *, target: str) -> dict[str, Any ) if not isinstance(version_payload, dict): raise ValueError(f"shared version not found: v{version_num}") - document = str(version_payload.get("skill_md") or version_payload.get("content") or "").strip() + version_bundle_record = self._bundle_record(version_payload.get("bundle_record")) + current_bundle_record = self._bundle_record(skill.get("remote_bundle_record")) + if self._requires_full_bundle(version_bundle_record): + self._activate_shared_bundle( + skill_name, + skill_root, + version=version_num, + bundle_record=version_bundle_record, + ) + activated_bundle = True + else: + document = str(version_payload.get("skill_md") or version_payload.get("content") or "").strip() + if self._requires_full_bundle(current_bundle_record): + raise ValueError( + "selected version only has a SKILL.md snapshot; full bundle replay is unavailable" + ) + self._write_document_version(skill_root, document) label = f"共享 v{version_num}" else: raise ValueError(f"unsupported activation target: {selected_target}") - if not document: + if not activated_bundle and not document: raise ValueError("selected version does not include a document snapshot") - local_path = Path(str(skill.get("local_path", "") or "")).expanduser() - if not str(local_path).strip() or local_path.name != "SKILL.md": - local_path = Path(self.config.skills_dir).expanduser() / skill_name / "SKILL.md" - local_path.parent.mkdir(parents=True, exist_ok=True) - local_path.write_text(document.rstrip() + "\n", encoding="utf-8") - sync_result = self.sync() return { "operation": "activate-skill-version", @@ -312,7 +380,7 @@ def activate_skill_version(self, skill_id: str, *, target: str) -> dict[str, Any "skill_name": skill_name, "target": selected_target, "label": label, - "local_path": str(local_path), + "local_path": str(skill_root / "SKILL.md"), "sync": sync_result["summary"], } diff --git a/skillclaw/dashboard_store.py b/skillclaw/dashboard_store.py index 3e109c2..59b9e4f 100644 --- a/skillclaw/dashboard_store.py +++ b/skillclaw/dashboard_store.py @@ -330,6 +330,7 @@ def get_meta(self) -> dict[str, Any]: return {str(row["key"]): _json_loads(row["value"], row["value"]) for row in rows} def _skill_summary_from_row(self, row: sqlite3.Row) -> dict[str, Any]: + payload = _json_loads(row["raw_json"], {}) return { "skill_id": row["skill_id"], "name": row["name"], @@ -344,6 +345,11 @@ def _skill_summary_from_row(self, row: sqlite3.Row) -> dict[str, Any]: "updated_at": row["updated_at"], "current_version": row["current_version"], "current_sha": row["current_sha"], + "local_sha": str(payload.get("local_sha", "") or ""), + "remote_sha": str(payload.get("remote_sha", "") or ""), + "current_tree_sha": str(payload.get("current_tree_sha", "") or ""), + "local_tree_sha": str(payload.get("local_tree_sha", "") or ""), + "remote_tree_sha": str(payload.get("remote_tree_sha", "") or ""), "local_inject_count": row["local_inject_count"], "observed_injection_count": row["observed_injection_count"], "read_count": row["read_count"], diff --git a/skillclaw/skill_bundle.py b/skillclaw/skill_bundle.py new file mode 100644 index 0000000..5905c73 --- /dev/null +++ b/skillclaw/skill_bundle.py @@ -0,0 +1,176 @@ +"""Helpers for reading, hashing, and writing multi-file skill bundles.""" + +from __future__ import annotations + +import hashlib +import os +import shutil +from pathlib import Path, PurePosixPath +from typing import Iterable, Mapping + +_BUNDLE_ENTRYPOINT = "SKILL.md" +_IGNORED_NAMES = {".DS_Store"} +_IGNORED_DIR_NAMES = {".git", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache"} +_IGNORED_SUFFIXES = {".pyc", ".pyo"} + + +class SkillBundleError(ValueError): + """Raised when a bundle is malformed or a bundle path is unsafe.""" + + +def _coerce_bytes(data: bytes | bytearray | str) -> bytes: + if isinstance(data, (bytes, bytearray)): + return bytes(data) + if isinstance(data, str): + return data.encode("utf-8") + raise TypeError(f"Unsupported bundle payload type: {type(data).__name__}") + + +def normalize_bundle_rel_path(rel_path: str) -> str: + value = str(rel_path or "").strip().replace("\\", "/") + if not value: + raise SkillBundleError("Bundle path must not be empty") + parts = PurePosixPath(value).parts + if not parts: + raise SkillBundleError("Bundle path must not be empty") + if any(part in {"", ".", ".."} for part in parts): + raise SkillBundleError(f"Unsafe bundle path: {rel_path!r}") + return "/".join(parts) + + +def is_ignored_bundle_rel_path(rel_path: str) -> bool: + parts = PurePosixPath(normalize_bundle_rel_path(rel_path)).parts + if any(part in _IGNORED_DIR_NAMES for part in parts[:-1]): + return True + leaf = parts[-1] + if leaf in _IGNORED_NAMES: + return True + return any(leaf.endswith(suffix) for suffix in _IGNORED_SUFFIXES) + + +def read_skill_bundle(skill_dir: str | os.PathLike[str]) -> dict[str, bytes]: + root = Path(skill_dir) + if not root.is_dir(): + return {} + + bundle: dict[str, bytes] = {} + for rel_path in list_skill_bundle_paths(root): + path = root / Path(rel_path) + bundle[rel_path] = path.read_bytes() + return bundle + + +def list_skill_bundle_paths(skill_dir: str | os.PathLike[str]) -> list[str]: + root = Path(skill_dir) + if not root.is_dir(): + return [] + + paths: list[str] = [] + for path in sorted(root.rglob("*")): + if not path.is_file(): + continue + rel_path = path.relative_to(root).as_posix() + if is_ignored_bundle_rel_path(rel_path): + continue + paths.append(rel_path) + return paths + + +def coerce_skill_bundle(bundle_files: Mapping[str, bytes | bytearray | str]) -> dict[str, bytes]: + bundle: dict[str, bytes] = {} + for raw_rel_path, raw_data in bundle_files.items(): + rel_path = normalize_bundle_rel_path(raw_rel_path) + if is_ignored_bundle_rel_path(rel_path): + continue + bundle[rel_path] = _coerce_bytes(raw_data) + return bundle + + +def bundle_file_records(bundle_files: Mapping[str, bytes | bytearray | str]) -> list[dict[str, int | str]]: + records: list[dict[str, int | str]] = [] + for rel_path, raw_data in sorted(coerce_skill_bundle(bundle_files).items()): + data = _coerce_bytes(raw_data) + records.append({ + "path": rel_path, + "sha256": hashlib.sha256(data).hexdigest(), + "size": len(data), + }) + return records + + +def bundle_tree_sha256(bundle_files: Mapping[str, bytes | bytearray | str]) -> str: + digest = hashlib.sha256() + for record in bundle_file_records(bundle_files): + digest.update(str(record["path"]).encode("utf-8")) + digest.update(b"\0") + digest.update(str(record["sha256"]).encode("ascii")) + digest.update(b"\0") + digest.update(str(record["size"]).encode("ascii")) + digest.update(b"\n") + return digest.hexdigest() + + +def read_skill_bundle_with_meta( + skill_dir: str | os.PathLike[str], +) -> tuple[dict[str, bytes], list[dict[str, int | str]], str]: + bundle = read_skill_bundle(skill_dir) + records = bundle_file_records(bundle) + tree_sha = bundle_tree_sha256(bundle) + return bundle, records, tree_sha + + +def bundle_entrypoint_bytes( + bundle_files: Mapping[str, bytes | bytearray | str], + entrypoint: str = _BUNDLE_ENTRYPOINT, +) -> bytes: + bundle = coerce_skill_bundle(bundle_files) + key = normalize_bundle_rel_path(entrypoint) + if key not in bundle: + raise SkillBundleError(f"Skill bundle is missing required entrypoint {key}") + return bundle[key] + + +def bundle_entrypoint_text( + bundle_files: Mapping[str, bytes | bytearray | str], + entrypoint: str = _BUNDLE_ENTRYPOINT, +) -> str: + return bundle_entrypoint_bytes(bundle_files, entrypoint).decode("utf-8") + + +def write_skill_bundle( + skill_dir: str | os.PathLike[str], + bundle_files: Mapping[str, bytes | bytearray | str], + *, + clean: bool = False, +) -> None: + root = Path(skill_dir) + if clean and root.exists(): + shutil.rmtree(root) + root.mkdir(parents=True, exist_ok=True) + + for rel_path, data in sorted(coerce_skill_bundle(bundle_files).items()): + path = root / Path(rel_path) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(data) + + +def bundle_has_only_entrypoint( + bundle_files: Mapping[str, bytes | bytearray | str], + entrypoint: str = _BUNDLE_ENTRYPOINT, +) -> bool: + bundle = coerce_skill_bundle(bundle_files) + return set(bundle.keys()) == {normalize_bundle_rel_path(entrypoint)} + + +def bundle_paths(bundle_files: Mapping[str, bytes | bytearray | str] | Iterable[str]) -> list[str]: + if isinstance(bundle_files, Mapping): + paths = bundle_files.keys() + else: + paths = bundle_files + out: list[str] = [] + for rel_path in paths: + clean = normalize_bundle_rel_path(str(rel_path)) + if is_ignored_bundle_rel_path(clean): + continue + out.append(clean) + return sorted(set(out)) diff --git a/skillclaw/skill_hub.py b/skillclaw/skill_hub.py index b7fa406..50926c8 100644 --- a/skillclaw/skill_hub.py +++ b/skillclaw/skill_hub.py @@ -24,19 +24,21 @@ from datetime import datetime, timezone from typing import Any, Collection, Optional +from evolve_server.core.skill_registry import SkillIDRegistry + from .object_store import build_object_store, is_not_found_error +from .skill_bundle import ( + bundle_entrypoint_bytes, + bundle_file_records, + bundle_has_only_entrypoint, + bundle_tree_sha256, + read_skill_bundle_with_meta, + write_skill_bundle, +) logger = logging.getLogger(__name__) -def _compute_sha256(path: str) -> str: - h = hashlib.sha256() - with open(path, "rb") as f: - for chunk in iter(lambda: f.read(8192), b""): - h.update(chunk) - return h.hexdigest() - - def _is_hermes_skill_root(skills_dir: str) -> bool: return os.path.realpath(skills_dir) == os.path.realpath(os.path.join(os.path.expanduser("~"), ".hermes", "skills")) @@ -121,6 +123,96 @@ def _manifest_key(self) -> str: def _skill_key(self, skill_name: str) -> str: return f"{self._prefix()}skills/{skill_name}/SKILL.md" + def _skill_files_prefix(self, skill_name: str) -> str: + return f"{self._prefix()}skills/{skill_name}/files/" + + def _skill_bundle_key(self, skill_name: str, rel_path: str) -> str: + clean = str(rel_path or "").strip().replace("\\", "/") + if clean == "SKILL.md": + return self._skill_key(skill_name) + return f"{self._skill_files_prefix(skill_name)}{clean}" + + def _iter_remote_keys(self, prefix: str): + return self._bucket.iter_objects(prefix=prefix) + + def _delete_remote_bundle_extras(self, skill_name: str, keep_paths: Collection[str]) -> None: + keep_keys = { + self._skill_bundle_key(skill_name, rel_path) + for rel_path in keep_paths + if rel_path != "SKILL.md" + } + for obj in self._iter_remote_keys(self._skill_files_prefix(skill_name)): + key = str(getattr(obj, "key", "") or "") + if key and key not in keep_keys: + self._bucket.delete_object(key) + + def _download_skill_bundle( + self, + skill_name: str, + record: dict[str, Any], + ) -> dict[str, bytes]: + bundle: dict[str, bytes] = {} + file_entries = record.get("files") + if isinstance(file_entries, list) and file_entries: + for item in file_entries: + rel_path = str((item or {}).get("path") or "").strip().replace("\\", "/") + if not rel_path: + continue + key = self._skill_bundle_key(skill_name, rel_path) + bundle[rel_path] = self._bucket.get_object(key).read() + else: + bundle["SKILL.md"] = self._bucket.get_object(self._skill_key(skill_name)).read() + return bundle + + def _skill_version_prefix(self, skill_name: str, version: int) -> str: + return f"{self._prefix()}skills/{skill_name}/versions/v{max(1, int(version or 1))}/" + + def _skill_version_bundle_key(self, skill_name: str, version: int, rel_path: str) -> str: + clean = str(rel_path or "").strip().replace("\\", "/") + if clean == "SKILL.md": + return f"{self._skill_version_prefix(skill_name, version)}SKILL.md" + return f"{self._skill_version_prefix(skill_name, version)}files/{clean}" + + def _skill_version_record_key(self, skill_name: str, version: int) -> str: + return f"{self._skill_version_prefix(skill_name, version)}bundle.json" + + def _save_version_bundle(self, skill_name: str, version: int, bundle_files: dict[str, bytes]) -> dict[str, Any]: + record = { + "format": "bundle_v1", + "entrypoint": "SKILL.md", + "tree_sha256": bundle_tree_sha256(bundle_files), + "files": bundle_file_records(bundle_files), + } + keep_keys: set[str] = set() + for rel_path, data in sorted(bundle_files.items()): + key = self._skill_version_bundle_key(skill_name, version, rel_path) + keep_keys.add(key) + self._bucket.put_object(key, data) + for obj in self._iter_remote_keys(f"{self._skill_version_prefix(skill_name, version)}files/"): + key = str(getattr(obj, "key", "") or "") + if key and key not in keep_keys: + self._bucket.delete_object(key) + self._bucket.put_object( + self._skill_version_record_key(skill_name, version), + json.dumps(record, ensure_ascii=False, indent=2).encode("utf-8"), + ) + return record + + @staticmethod + def _local_bundle_matches_record(skill_dir: str, record: dict[str, Any]) -> bool: + bundle, _records, tree_sha = read_skill_bundle_with_meta(skill_dir) + if not bundle: + return False + if record.get("format") == "bundle_v1": + return str(record.get("tree_sha256") or "") == tree_sha + + try: + skill_md = bundle_entrypoint_bytes(bundle) + except Exception: + return False + skill_sha = hashlib.sha256(skill_md).hexdigest() + return bundle_has_only_entrypoint(bundle) and str(record.get("sha256") or "") == skill_sha + # ------------------------------------------------------------------ # # Manifest operations # # ------------------------------------------------------------------ # @@ -194,6 +286,8 @@ def push_skills( return {"uploaded": 0, "skipped": 0, "filtered": 0, "total_local": 0} manifest = self._load_remote_manifest() + registry = SkillIDRegistry() + registry.load_from_oss(self._bucket, self._prefix()) uploaded = 0 skipped = 0 filtered = 0 @@ -205,6 +299,7 @@ def push_skills( for path in paths: skill_name = os.path.basename(os.path.dirname(path)) + skill_dir = os.path.dirname(path) if use_filter and skill_name in stats: entry = stats[skill_name] @@ -221,19 +316,46 @@ def push_skills( filtered += 1 continue - local_sha = _compute_sha256(path) + bundle_files, bundle_records, tree_sha = read_skill_bundle_with_meta(skill_dir) + skill_md = bundle_entrypoint_bytes(bundle_files) + local_sha = hashlib.sha256(skill_md).hexdigest() remote_rec = manifest.get(skill_name) - if remote_rec and remote_rec.get("sha256") == local_sha: + if remote_rec and self._local_bundle_matches_record(skill_dir, remote_rec): skipped += 1 continue - with open(path, "rb") as f: - self._bucket.put_object(self._skill_key(skill_name), f) + self._bucket.put_object(self._skill_key(skill_name), skill_md) + for rel_path, data in sorted(bundle_files.items()): + if rel_path == "SKILL.md": + continue + self._bucket.put_object(self._skill_bundle_key(skill_name, rel_path), data) + self._delete_remote_bundle_extras(skill_name, bundle_files.keys()) + + bundle_record = { + "format": "bundle_v1", + "entrypoint": "SKILL.md", + "tree_sha256": tree_sha, + "files": bundle_records, + } + version = registry.record_update( + skill_name, + local_sha, + action="push", + bundle_record=bundle_record, + ) + self._save_version_bundle(skill_name, version, bundle_files) manifest[skill_name] = { + **(remote_rec or {}), "name": skill_name, + "skill_id": registry.get_or_create(skill_name), + "version": version, "sha256": local_sha, + "tree_sha256": tree_sha, + "format": "bundle_v1", + "entrypoint": "SKILL.md", + "files": bundle_records, "uploaded_by": self._user_alias, "uploaded_at": datetime.now(timezone.utc).isoformat(), } @@ -244,6 +366,7 @@ def push_skills( if uploaded > 0: self._save_remote_manifest(manifest) + registry.save_to_oss(self._bucket, self._prefix()) logger.info( "[SkillHub] push complete: %d uploaded, %d skipped, %d filtered, %d total", @@ -487,7 +610,6 @@ def _result( local_dirs_by_name, ) local_path = os.path.join(local_dir, "SKILL.md") - remote_sha = rec.get("sha256", "") if name in skip_set and os.path.exists(local_path): skipped += 1 @@ -495,23 +617,18 @@ def _result( logger.info("[SkillHub] preserved local skill during pull: %s", name) continue - if os.path.exists(local_path): - local_sha = _compute_sha256(local_path) - if local_sha == remote_sha: - skipped += 1 - self._remove_duplicate_local_skill_dirs(name, local_dir, local_dirs_by_name) - continue + if os.path.isdir(local_dir) and self._local_bundle_matches_record(local_dir, rec): + skipped += 1 + self._remove_duplicate_local_skill_dirs(name, local_dir, local_dirs_by_name) + continue try: - result = self._bucket.get_object(self._skill_key(name)) - content = result.read() + bundle = self._download_skill_bundle(name, rec) except Exception as e: logger.warning("[SkillHub] failed to download skill %s: %s", name, e) continue - os.makedirs(local_dir, exist_ok=True) - with open(local_path, "wb") as f: - f.write(content) + write_skill_bundle(local_dir, bundle, clean=True) downloaded += 1 self._remove_duplicate_local_skill_dirs(name, local_dir, local_dirs_by_name) logger.info("[SkillHub] pulled skill: %s", name) @@ -568,40 +685,25 @@ def _result( resolved_targets[name] = target_dir local_path = os.path.join(target_dir, "SKILL.md") staged_dir = os.path.join(staging_dir, os.path.relpath(target_dir, skills_dir)) - staged_path = os.path.join(staged_dir, "SKILL.md") - remote_sha = rec.get("sha256", "") - content: bytes if name in skip_set and os.path.exists(local_path): - with open(local_path, "rb") as f: - content = f.read() skipped += 1 - os.makedirs(staged_dir, exist_ok=True) - with open(staged_path, "wb") as f: - f.write(content) + if os.path.isdir(target_dir): + shutil.copytree(target_dir, staged_dir, dirs_exist_ok=True) logger.info("[SkillHub] preserved local skill during pull: %s", name) continue - if os.path.exists(local_path): - local_sha = _compute_sha256(local_path) - if local_sha == remote_sha: - with open(local_path, "rb") as f: - content = f.read() - skipped += 1 - os.makedirs(staged_dir, exist_ok=True) - with open(staged_path, "wb") as f: - f.write(content) - continue + if os.path.isdir(target_dir) and self._local_bundle_matches_record(target_dir, rec): + skipped += 1 + shutil.copytree(target_dir, staged_dir, dirs_exist_ok=True) + continue try: - result = self._bucket.get_object(self._skill_key(name)) - content = result.read() + bundle = self._download_skill_bundle(name, rec) except Exception as e: raise RuntimeError(f"failed to download skill {name}: {e}") from e - os.makedirs(staged_dir, exist_ok=True) - with open(staged_path, "wb") as f: - f.write(content) + write_skill_bundle(staged_dir, bundle, clean=True) downloaded += 1 logger.info("[SkillHub] pulled skill: %s", name) diff --git a/skillclaw/skill_manager.py b/skillclaw/skill_manager.py index 0513ead..55547a2 100644 --- a/skillclaw/skill_manager.py +++ b/skillclaw/skill_manager.py @@ -59,6 +59,8 @@ import yaml +from .skill_bundle import list_skill_bundle_paths + logger = logging.getLogger(__name__) _SAFE_NAME_RE = re.compile(r"^[a-z][a-z0-9-]{1,63}$") @@ -513,18 +515,28 @@ def get_all_skills(self) -> list[dict]: ] def get_skill_path_map(self) -> Dict[str, Dict[str, str]]: - """Return a mapping from file_path → {skill_id, skill_name}. + """Return a mapping from bundle file path → {skill_id, skill_name}. Used by the server to resolve which skill a ``read`` tool call targets. """ path_map: Dict[str, Dict[str, str]] = {} for s in self.get_all_skills(): - for fp in [s.get("file_path", ""), self._public_skill_path(s)]: - if fp: - path_map[fp] = { - "skill_id": s.get("id", ""), - "skill_name": s.get("name", ""), - } + skill_dir = os.path.dirname(str(s.get("file_path", "") or "")) + bundle_paths = list_skill_bundle_paths(skill_dir) if skill_dir else [] + bundle_paths = bundle_paths or ["SKILL.md"] + public_dir = os.path.dirname(self._public_skill_path(s)) if self._public_skill_path(s) else "" + for rel_path in bundle_paths: + locations = [] + if skill_dir: + locations.append(os.path.realpath(os.path.join(skill_dir, rel_path))) + if public_dir: + locations.append(os.path.realpath(os.path.join(public_dir, rel_path))) + for fp in locations: + if fp: + path_map[fp] = { + "skill_id": s.get("id", ""), + "skill_name": s.get("name", ""), + } return path_map def _public_skill_path(self, skill: dict) -> str: diff --git a/tests/test_dashboard.py b/tests/test_dashboard.py index a720a0a..00321e9 100644 --- a/tests/test_dashboard.py +++ b/tests/test_dashboard.py @@ -13,6 +13,7 @@ from skillclaw.dashboard_ingest import build_dashboard_snapshot from skillclaw.dashboard_server import DashboardService, create_dashboard_app from skillclaw.dashboard_store import DashboardStore +from skillclaw.skill_bundle import bundle_file_records, bundle_tree_sha256 def _sha256_text(value: str) -> str: @@ -39,8 +40,15 @@ def _skill_doc(name: str, description: str, body: str, *, category: str = "gener ) -def _history_entry(version: int, document: str, timestamp: str, action: str) -> dict[str, object]: - return { +def _history_entry( + version: int, + document: str, + timestamp: str, + action: str, + *, + bundle_record: dict[str, object] | None = None, +) -> dict[str, object]: + payload: dict[str, object] = { "version": version, "content_sha": _sha256_text(document), "timestamp": timestamp, @@ -48,6 +56,29 @@ def _history_entry(version: int, document: str, timestamp: str, action: str) -> "skill_md": document, "content": document, } + if isinstance(bundle_record, dict): + payload.update(bundle_record) + return payload + + +def _bundle_record(bundle_files: dict[str, bytes]) -> dict[str, object]: + return { + "format": "bundle_v1", + "entrypoint": "SKILL.md", + "tree_sha256": bundle_tree_sha256(bundle_files), + "files": bundle_file_records(bundle_files), + } + + +def _write_storage_bundle(root: Path, bundle_files: dict[str, bytes]) -> None: + root.mkdir(parents=True, exist_ok=True) + for rel_path, data in bundle_files.items(): + if rel_path == "SKILL.md": + path = root / "SKILL.md" + else: + path = root / "files" / Path(rel_path) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(data) def _transcript_record(role: str, text: str) -> dict[str, object]: @@ -285,10 +316,36 @@ def _create_shared_snapshot(self) -> None: for path in (skills_dir, sessions_dir, validation_jobs_dir, validation_results_dir, validation_decisions_dir): path.mkdir(parents=True, exist_ok=True) + debug_v2_doc = _skill_doc( + "debug-notes", + "Keep a compact running log while debugging.", + "Capture the failing assumption and note what changed after each retry.", + category="coding", + ) + debug_v2_bundle = { + "SKILL.md": debug_v2_doc.encode("utf-8"), + "references/guide.md": b"v2 debug guide\n", + } + debug_v3_bundle = { + "SKILL.md": self.shared_docs["debug-notes"].encode("utf-8"), + "references/checklist.md": b"v3 debug checklist\n", + } + for name, document in self.shared_docs.items(): skill_dir = skills_dir / name skill_dir.mkdir(parents=True, exist_ok=True) (skill_dir / "SKILL.md").write_text(document, encoding="utf-8") + _write_storage_bundle(skills_dir / "debug-notes", debug_v3_bundle) + _write_storage_bundle(skills_dir / "debug-notes" / "versions" / "v2", debug_v2_bundle) + _write_storage_bundle(skills_dir / "debug-notes" / "versions" / "v3", debug_v3_bundle) + (skills_dir / "debug-notes" / "versions" / "v2" / "bundle.json").write_text( + json.dumps(_bundle_record(debug_v2_bundle), indent=2), + encoding="utf-8", + ) + (skills_dir / "debug-notes" / "versions" / "v3" / "bundle.json").write_text( + json.dumps(_bundle_record(debug_v3_bundle), indent=2), + encoding="utf-8", + ) manifest = [ { @@ -296,6 +353,8 @@ def _create_shared_snapshot(self) -> None: "description": "Keep a compact running log while debugging.", "category": "coding", "sha256": _sha256_text(self.shared_docs["debug-notes"]), + "version": 3, + **_bundle_record(debug_v3_bundle), "uploaded_by": "alice", "uploaded_at": "2026-04-20T09:30:00Z", }, @@ -364,17 +423,20 @@ def _create_shared_snapshot(self) -> None: ), _history_entry( 2, - _skill_doc( - "debug-notes", - "Keep a compact running log while debugging.", - "Capture the failing assumption and note what changed after each retry.", - category="coding", - ), + debug_v2_doc, "2026-04-19T08:15:00Z", "improve", + bundle_record=_bundle_record(debug_v2_bundle), + ), + _history_entry( + 3, + self.shared_docs["debug-notes"], + "2026-04-20T09:30:00Z", + "improve", + bundle_record=_bundle_record(debug_v3_bundle), ), - _history_entry(3, self.shared_docs["debug-notes"], "2026-04-20T09:30:00Z", "improve"), ], + **_bundle_record(debug_v3_bundle), }, "incident-timeline": { "skill_id": _skill_id("incident-timeline"), @@ -1339,6 +1401,22 @@ def test_dashboard_api_and_ui(self) -> None: debug_detail = detail_resp.json() self.assertEqual(debug_detail["name"], "debug-notes") debug_v2 = next(item for item in debug_detail["versions"] if int(item["version"]) == 2) + debug_v3 = next(item for item in debug_detail["versions"] if int(item["version"]) == 3) + + activate_current_resp = client.post( + f"/api/v1/skills/{debug_skill['skill_id']}/activate", + json={"target": "shared-current"}, + ) + self.assertEqual(activate_current_resp.status_code, 200) + local_debug_dir = self.fixture.skills_dir / "debug-notes" + self.assertEqual( + (local_debug_dir / "SKILL.md").read_text(encoding="utf-8").strip(), + debug_v3["skill_md"].strip(), + ) + self.assertTrue((local_debug_dir / "references" / "checklist.md").is_file()) + self.assertFalse((local_debug_dir / "references" / "guide.md").exists()) + synced_detail = client.get(f"/api/v1/skills/{debug_skill['skill_id']}").json() + self.assertEqual(synced_detail["local_tree_sha"], synced_detail["remote_tree_sha"]) activate_resp = client.post( f"/api/v1/skills/{debug_skill['skill_id']}/activate", @@ -1346,8 +1424,10 @@ def test_dashboard_api_and_ui(self) -> None: ) self.assertEqual(activate_resp.status_code, 200) self.assertEqual(activate_resp.json()["target"], "shared-version:2") - local_debug_path = self.fixture.skills_dir / "debug-notes" / "SKILL.md" + local_debug_path = local_debug_dir / "SKILL.md" self.assertEqual(local_debug_path.read_text(encoding="utf-8").strip(), debug_v2["skill_md"].strip()) + self.assertTrue((local_debug_dir / "references" / "guide.md").is_file()) + self.assertFalse((local_debug_dir / "references" / "checklist.md").exists()) sessions_resp = client.get("/api/v1/sessions") self.assertEqual(sessions_resp.status_code, 200) diff --git a/tests/test_skill_bundle_support.py b/tests/test_skill_bundle_support.py new file mode 100644 index 0000000..69baa2c --- /dev/null +++ b/tests/test_skill_bundle_support.py @@ -0,0 +1,377 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from evolve_server.engines.agent_workspace import AgentWorkspace +from skillclaw.skill_hub import SkillHub + +SKILL_MD = """--- +name: demo-skill +description: Demo bundle skill +category: general +--- + +# Demo Skill + +Use the bundled resources. +""" + + +def _skill_md(name: str) -> str: + return f"""--- +name: {name} +description: Demo bundle skill +category: general +--- + +# Demo Skill + +Use the bundled resources. +""" + + +def _write_bytes(path: Path, data: bytes) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(data) + + +def test_skill_hub_push_pull_roundtrips_bundle(tmp_path: Path) -> None: + skills_dir = tmp_path / "skills" + _write_bytes(skills_dir / "demo-skill" / "SKILL.md", SKILL_MD.encode("utf-8")) + _write_bytes(skills_dir / "demo-skill" / "references" / "guide.md", b"hello bundle\n") + _write_bytes(skills_dir / "demo-skill" / "scripts" / "tool.py", b"print('ok')\n") + _write_bytes(skills_dir / "demo-skill" / "assets" / "icon.bin", b"\x00\x01\x02\x03") + + bucket_root = tmp_path / "bucket" + hub = SkillHub( + backend="local", + endpoint="", + bucket="", + access_key_id="", + secret_access_key="", + local_root=str(bucket_root), + group_id="team-a", + user_alias="tester", + ) + + push_result = hub.push_skills(str(skills_dir)) + + assert push_result["uploaded"] == 1 + manifest = hub._load_remote_manifest() + rec = manifest["demo-skill"] + assert rec["format"] == "bundle_v1" + assert rec["entrypoint"] == "SKILL.md" + assert rec["tree_sha256"] + assert {item["path"] for item in rec["files"]} == { + "SKILL.md", + "references/guide.md", + "scripts/tool.py", + "assets/icon.bin", + } + + restored_dir = tmp_path / "restored-skills" + pull_result = hub.pull_skills(str(restored_dir)) + + assert pull_result["downloaded"] == 1 + assert (restored_dir / "demo-skill" / "SKILL.md").read_text(encoding="utf-8") == SKILL_MD + assert (restored_dir / "demo-skill" / "references" / "guide.md").read_bytes() == b"hello bundle\n" + assert (restored_dir / "demo-skill" / "scripts" / "tool.py").read_bytes() == b"print('ok')\n" + assert (restored_dir / "demo-skill" / "assets" / "icon.bin").read_bytes() == b"\x00\x01\x02\x03" + + +def test_skill_hub_push_pull_roundtrips_single_file_skill(tmp_path: Path) -> None: + skills_dir = tmp_path / "skills" + solo_md = _skill_md("solo-skill") + _write_bytes(skills_dir / "solo-skill" / "SKILL.md", solo_md.encode("utf-8")) + + bucket_root = tmp_path / "bucket" + hub = SkillHub( + backend="local", + endpoint="", + bucket="", + access_key_id="", + secret_access_key="", + local_root=str(bucket_root), + group_id="team-a", + user_alias="tester", + ) + + push_result = hub.push_skills(str(skills_dir)) + + assert push_result["uploaded"] == 1 + manifest = hub._load_remote_manifest() + rec = manifest["solo-skill"] + assert rec["format"] == "bundle_v1" + assert rec["entrypoint"] == "SKILL.md" + assert rec["files"] == [{ + "path": "SKILL.md", + "sha256": rec["sha256"], + "size": len(solo_md.encode("utf-8")), + }] + + restored_dir = tmp_path / "restored-skills" + pull_result = hub.pull_skills(str(restored_dir)) + + assert pull_result["downloaded"] == 1 + assert (restored_dir / "solo-skill" / "SKILL.md").read_text(encoding="utf-8") == solo_md + restored_files = sorted( + p.relative_to(restored_dir / "solo-skill").as_posix() + for p in (restored_dir / "solo-skill").rglob("*") + if p.is_file() + ) + assert restored_files == ["SKILL.md"] + + +def test_skill_hub_persists_bundle_version_snapshots(tmp_path: Path) -> None: + skills_dir = tmp_path / "skills" + skill_dir = skills_dir / "demo-skill" + _write_bytes(skill_dir / "SKILL.md", SKILL_MD.encode("utf-8")) + _write_bytes(skill_dir / "references" / "guide.md", b"v1 guide\n") + + bucket_root = tmp_path / "bucket" + hub = SkillHub( + backend="local", + endpoint="", + bucket="", + access_key_id="", + secret_access_key="", + local_root=str(bucket_root), + group_id="team-a", + user_alias="tester", + ) + + first_push = hub.push_skills(str(skills_dir)) + assert first_push["uploaded"] == 1 + + _write_bytes(skill_dir / "SKILL.md", _skill_md("demo-skill").encode("utf-8")) + _write_bytes(skill_dir / "references" / "guide.md", b"v2 guide\n") + second_push = hub.push_skills(str(skills_dir)) + assert second_push["uploaded"] == 1 + + registry_path = bucket_root / "team-a" / "evolve_skill_registry.json" + registry = json.loads(registry_path.read_text(encoding="utf-8")) + entry = registry["demo-skill"] + assert entry["version"] == 2 + assert [item["version"] for item in entry["history"]] == [1, 2] + assert all(item.get("tree_sha256") for item in entry["history"]) + assert all( + any(file_item["path"] == "references/guide.md" for file_item in item.get("files", [])) + for item in entry["history"] + ) + + v1_root = bucket_root / "team-a" / "skills" / "demo-skill" / "versions" / "v1" + v2_root = bucket_root / "team-a" / "skills" / "demo-skill" / "versions" / "v2" + assert (v1_root / "SKILL.md").read_text(encoding="utf-8") == SKILL_MD + assert (v1_root / "files" / "references" / "guide.md").read_bytes() == b"v1 guide\n" + assert (v2_root / "SKILL.md").read_text(encoding="utf-8") == _skill_md("demo-skill") + assert (v2_root / "files" / "references" / "guide.md").read_bytes() == b"v2 guide\n" + + +def test_skill_hub_roundtrips_extra_unstructured_files_and_attributes_them(tmp_path: Path) -> None: + import json + + from skillclaw.api_server import _extract_read_skills_from_tool_calls + from skillclaw.skill_manager import SkillManager + + skills_dir = tmp_path / "skills" + extra_md = _skill_md("extra-skill") + _write_bytes(skills_dir / "extra-skill" / "SKILL.md", extra_md.encode("utf-8")) + _write_bytes(skills_dir / "extra-skill" / "notes" / "checklist.txt", b"bundle extras stay intact\n") + _write_bytes(skills_dir / "extra-skill" / "workspace" / "payload.bin", b"\x10\x20\x30\x40") + + bucket_root = tmp_path / "bucket" + hub = SkillHub( + backend="local", + endpoint="", + bucket="", + access_key_id="", + secret_access_key="", + local_root=str(bucket_root), + group_id="team-a", + user_alias="tester", + ) + + push_result = hub.push_skills(str(skills_dir)) + assert push_result["uploaded"] == 1 + + manifest = hub._load_remote_manifest() + rec = manifest["extra-skill"] + assert {item["path"] for item in rec["files"]} == { + "SKILL.md", + "notes/checklist.txt", + "workspace/payload.bin", + } + + restored_dir = tmp_path / "restored-skills" + pull_result = hub.pull_skills(str(restored_dir)) + assert pull_result["downloaded"] == 1 + assert (restored_dir / "extra-skill" / "notes" / "checklist.txt").read_bytes() == b"bundle extras stay intact\n" + assert (restored_dir / "extra-skill" / "workspace" / "payload.bin").read_bytes() == b"\x10\x20\x30\x40" + + manager = SkillManager(str(restored_dir)) + skill_path_map = manager.get_skill_path_map() + extra_path = str((restored_dir / "extra-skill" / "notes" / "checklist.txt").resolve()) + + read_calls = [ + { + "id": "call_skill_view_extra", + "function": { + "name": "skill_view", + "arguments": json.dumps({"name": "extra-skill", "file_path": "notes/checklist.txt"}), + }, + } + ] + + read_skills = _extract_read_skills_from_tool_calls(read_calls, skill_path_map) + + assert read_skills == [{ + "skill_id": manager.get_all_skills()[0]["id"], + "skill_name": "extra-skill", + "path": extra_path, + }] + + +def test_agent_workspace_detects_nested_bundle_changes(tmp_path: Path) -> None: + workspace = AgentWorkspace(tmp_path / "workspace") + workspace.prepare( + sessions=[], + existing_skills={ + "demo-skill": { + "SKILL.md": SKILL_MD.encode("utf-8"), + "references/guide.md": b"first\n", + } + }, + manifest={"demo-skill": {"name": "demo-skill"}}, + agents_md="# evolve", + skill_registry_info=None, + ) + + before = workspace.snapshot_skills() + (workspace.skills_dir / "demo-skill" / "references" / "guide.md").write_text( + "second\n", encoding="utf-8" + ) + + changes = workspace.collect_changes(before) + + assert len(changes) == 1 + change = changes[0] + assert change["name"] == "demo-skill" + assert change["action"] == "improve" + assert change["raw_md"] == SKILL_MD + assert change["bundle_files"]["SKILL.md"] == SKILL_MD.encode("utf-8") + assert change["bundle_files"]["references/guide.md"] == b"second\n" + assert change["tree_sha256"] + + +def test_skill_path_map_and_tool_attribution_include_bundle_files(tmp_path: Path) -> None: + import json + + from skillclaw.api_server import ( + _extract_modified_skills_from_tool_calls, + _extract_read_skills_from_tool_calls, + ) + from skillclaw.skill_manager import SkillManager + + skills_dir = tmp_path / "skills" + _write_bytes(skills_dir / "demo-skill" / "SKILL.md", SKILL_MD.encode("utf-8")) + _write_bytes(skills_dir / "demo-skill" / "references" / "guide.md", b"look here\n") + _write_bytes(skills_dir / "demo-skill" / "scripts" / "tool.py", b"print('ok')\n") + + manager = SkillManager(str(skills_dir)) + skill_path_map = manager.get_skill_path_map() + + reference_path = str((skills_dir / "demo-skill" / "references" / "guide.md").resolve()) + script_path = str((skills_dir / "demo-skill" / "scripts" / "tool.py").resolve()) + assert reference_path in skill_path_map + assert script_path in skill_path_map + + read_calls = [ + { + "id": "call_read_1", + "function": {"name": "read", "arguments": json.dumps({"path": reference_path})}, + } + ] + write_calls = [ + { + "id": "call_edit_1", + "function": {"name": "edit_file", "arguments": json.dumps({"path": script_path})}, + } + ] + + read_skills = _extract_read_skills_from_tool_calls(read_calls, skill_path_map) + modified_skills = _extract_modified_skills_from_tool_calls(write_calls, skill_path_map) + + assert read_skills == [{ + "skill_id": manager.get_all_skills()[0]["id"], + "skill_name": "demo-skill", + "path": reference_path, + }] + assert modified_skills == [{ + "skill_id": manager.get_all_skills()[0]["id"], + "skill_name": "demo-skill", + "path": script_path, + "action": "edit_file", + }] + + +def test_hermes_skill_tool_attribution_uses_bundle_child_paths(tmp_path: Path) -> None: + import json + + from skillclaw.api_server import ( + _extract_modified_skills_from_tool_calls, + _extract_read_skills_from_tool_calls, + ) + from skillclaw.skill_manager import SkillManager + + skills_dir = tmp_path / "skills" + _write_bytes(skills_dir / "demo-skill" / "SKILL.md", SKILL_MD.encode("utf-8")) + _write_bytes(skills_dir / "demo-skill" / "references" / "guide.md", b"look here\n") + _write_bytes(skills_dir / "demo-skill" / "scripts" / "tool.py", b"print('ok')\n") + + manager = SkillManager(str(skills_dir)) + skill_path_map = manager.get_skill_path_map() + skill_id = manager.get_all_skills()[0]["id"] + + reference_path = str((skills_dir / "demo-skill" / "references" / "guide.md").resolve()) + script_path = str((skills_dir / "demo-skill" / "scripts" / "tool.py").resolve()) + + read_calls = [ + { + "id": "call_skill_view_1", + "function": { + "name": "skill_view", + "arguments": json.dumps({"name": "demo-skill", "file_path": "references/guide.md"}), + }, + } + ] + write_calls = [ + { + "id": "call_skill_manage_1", + "function": { + "name": "skill_manage", + "arguments": json.dumps( + { + "action": "write_file", + "name": "demo-skill", + "file_path": "scripts/tool.py", + } + ), + }, + } + ] + + read_skills = _extract_read_skills_from_tool_calls(read_calls, skill_path_map) + modified_skills = _extract_modified_skills_from_tool_calls(write_calls, skill_path_map) + + assert read_skills == [{ + "skill_id": skill_id, + "skill_name": "demo-skill", + "path": reference_path, + }] + assert modified_skills == [{ + "skill_id": skill_id, + "skill_name": "demo-skill", + "path": script_path, + "action": "skill_manage", + }]