Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 66 additions & 3 deletions src/skillport/modules/skills/internal/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import re
import unicodedata
from pathlib import Path

Expand All @@ -13,6 +14,13 @@
DESCRIPTION_MAX_LENGTH = 1024
COMPATIBILITY_MAX_LENGTH = 500

# Reserved words that cannot appear in skill names
RESERVED_WORDS: frozenset[str] = frozenset({"anthropic", "claude"})

# Pattern to detect XML-like tags (e.g., <tag>, </tag>, <tag attr="x"/>)
# Tags must start with a letter or "/" (for closing tags)
_XML_TAG_PATTERN = re.compile(r"</?[a-zA-Z][^>]*>")


def _is_valid_name_char(char: str) -> bool:
"""Check if a character is valid for skill names (lowercase letter, digit, or hyphen)."""
Expand All @@ -29,6 +37,20 @@ def _validate_name_chars(name: str) -> bool:
return all(_is_valid_name_char(c) for c in normalized)


def _contains_reserved_word(name: str) -> str | None:
"""Return the first reserved word found in name, or None."""
name_lower = name.lower()
for word in RESERVED_WORDS:
if word in name_lower:
return word
Comment on lines +41 to +45
Copy link

Copilot AI Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reserved word detection uses substring matching, which will produce false positives. For example, a skill named "anthropology-101" would be incorrectly rejected because it contains "anthropic" as a substring. Consider using word boundary matching instead, such as using regex with \b word boundaries or splitting on hyphens and checking each component.

Suggested change
"""Return the first reserved word found in name, or None."""
name_lower = name.lower()
for word in RESERVED_WORDS:
if word in name_lower:
return word
"""Return the first reserved word found in name, or None.
Reserved words are matched against hyphen-separated components of the name
(rather than arbitrary substrings) to avoid false positives such as
"anthropology-101" containing "anthropic" as a substring.
"""
normalized_lower = unicodedata.normalize("NFKC", name).lower()
for part in normalized_lower.split("-"):
if part and part in RESERVED_WORDS:
return part

Copilot uses AI. Check for mistakes.
return None


def _contains_xml_tags(text: str) -> bool:
"""Check if text contains XML-like tags."""
return bool(_XML_TAG_PATTERN.search(text))


# Allowed top-level frontmatter properties
ALLOWED_FRONTMATTER_KEYS: set[str] = {
"name",
Expand Down Expand Up @@ -64,7 +86,7 @@ def validate_skill_record(
path = skill.get("path", "")
dir_name = Path(path).name if path else ""

# A1/A2: Key existence checks (only when meta is provided)
# A1/A2: Key existence and type checks (only when meta is provided)
if meta is not None:
if "name" not in meta:
issues.append(
Expand All @@ -74,6 +96,14 @@ def validate_skill_record(
field="name",
)
)
elif not isinstance(meta["name"], str):
issues.append(
ValidationIssue(
severity="fatal",
message=f"frontmatter.name: must be a string (got {type(meta['name']).__name__})",
field="name",
)
)
if "description" not in meta:
issues.append(
ValidationIssue(
Expand All @@ -82,6 +112,14 @@ def validate_skill_record(
field="description",
)
)
elif not isinstance(meta["description"], str):
issues.append(
ValidationIssue(
severity="fatal",
message=f"frontmatter.description: must be a string (got {type(meta['description']).__name__})",
field="description",
)
)

# Required fields (value checks)
if not name:
Expand Down Expand Up @@ -116,7 +154,7 @@ def validate_skill_record(
)
)

if name:
if name and isinstance(name, str):
if len(name) > NAME_MAX_LENGTH:
issues.append(
ValidationIssue(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Type validation skipped when meta is absent

The new string-type enforcement is only applied in the meta branch, and the main validation block now runs only when isinstance(name, str). Calls without meta (e.g., validate_skill and skillport validate --id, which pass index records with no _meta and often no path) will treat non-string fields like name=True or description=['desc'] as valid because they bypass this guarded block, so the intended "must be a string" rule is never raised in those flows. This lets YAML type-coerced data pass validation whenever meta is unavailable.

Useful? React with 👍 / 👎.

Expand Down Expand Up @@ -149,8 +187,25 @@ def validate_skill_record(
field="name",
)
)
reserved = _contains_reserved_word(name)
if reserved:
issues.append(
ValidationIssue(
severity="fatal",
message=f"frontmatter.name: cannot contain reserved word '{reserved}'",
field="name",
)
)
if _contains_xml_tags(name):
issues.append(
ValidationIssue(
severity="fatal",
message="frontmatter.name: cannot contain XML tags",
field="name",
)
)

if description:
if description and isinstance(description, str):
if len(description) > DESCRIPTION_MAX_LENGTH:
issues.append(
ValidationIssue(
Expand All @@ -159,6 +214,14 @@ def validate_skill_record(
field="description",
)
)
if _contains_xml_tags(description):
issues.append(
ValidationIssue(
severity="fatal",
message="frontmatter.description: cannot contain XML tags",
Comment on lines +223 to +227

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Guard XML-tag check for non-string descriptions

If a SKILL.md frontmatter uses a non-string description (e.g., YAML list such as description: [item1, item2]), validate_skill_record now appends the new "must be a string" fatal but then immediately calls _contains_xml_tags(description) on lines 217-221, which invokes re.search on a list and raises TypeError: expected string or bytes-like object. The validation command (skillport validate path loader passes the raw meta as description) will crash before returning issues, so the new type-check rule never surfaces and the CLI aborts on this input.

Useful? React with 👍 / 👎.

field="description",
)
)

# Check for unexpected frontmatter keys and compatibility (requires reading SKILL.md)
if path:
Expand Down
105 changes: 105 additions & 0 deletions tests/unit/test_validation_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
COMPATIBILITY_MAX_LENGTH,
DESCRIPTION_MAX_LENGTH,
NAME_MAX_LENGTH,
RESERVED_WORDS,
SKILL_LINE_THRESHOLD,
validate_skill_record,
)
Expand Down Expand Up @@ -135,6 +136,60 @@ def test_name_valid_hyphens(self):
hyphen_issues = [i for i in issues if "hyphen" in i.message.lower()]
assert len(hyphen_issues) == 0

@pytest.mark.parametrize("reserved", list(RESERVED_WORDS))
def test_name_reserved_word(self, reserved: str):
"""name containing reserved word → fatal."""
name = f"my-{reserved}-skill"
issues = validate_skill_record(
{"name": name, "description": "desc", "path": f"/skills/{name}"}
)
fatal = [i for i in issues if i.severity == "fatal" and "reserved" in i.message.lower()]
assert len(fatal) == 1
assert reserved in fatal[0].message

def test_name_reserved_word_case_insensitive(self):
"""Reserved word check should be case-insensitive."""
# Note: name validation already fails on uppercase, but reserved check is independent
issues = validate_skill_record(
{"name": "my-skill", "description": "desc", "path": "/skills/my-skill"}
)
reserved_issues = [i for i in issues if "reserved" in i.message.lower()]
assert len(reserved_issues) == 0
Comment on lines +150 to +157
Copy link

Copilot AI Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test doesn't actually verify case-insensitive reserved word detection. The name "my-skill" contains no reserved words (neither "anthropic" nor "claude"), so this test would pass even if the case-insensitive logic was broken. To properly test case insensitivity, use a name like "my-Anthropic-skill" or "my-CLAUDE-tool" and verify it triggers the reserved word check.

Copilot uses AI. Check for mistakes.

def test_name_xml_tags(self):
"""name containing XML tags → fatal."""
issues = validate_skill_record(
{"name": "<script>", "description": "desc", "path": "/skills/<script>"}
)
fatal = [i for i in issues if i.severity == "fatal" and "xml" in i.message.lower()]
assert len(fatal) == 1

def test_description_xml_tags(self):
"""description containing XML tags → fatal."""
issues = validate_skill_record(
{
"name": "my-skill",
"description": "A skill with <script>alert('xss')</script> injection",
"path": "/skills/my-skill",
}
)
fatal = [i for i in issues if i.severity == "fatal" and "xml" in i.message.lower()]
assert len(fatal) == 1
assert "description" in fatal[0].field

def test_description_no_xml_tags_ok(self):
"""description without XML tags → ok."""
issues = validate_skill_record(
{
"name": "my-skill",
"description": "A valid description with math like 3 < 5 or x > y",
"path": "/skills/my-skill",
}
)
xml_issues = [i for i in issues if "xml" in i.message.lower()]
# "<" and ">" used separately (not forming tags) should pass
assert len(xml_issues) == 0


class TestValidationWarning:
"""Warning validation rules (exit code 0)."""
Expand Down Expand Up @@ -405,6 +460,56 @@ def test_meta_none_skips_key_check(self):
key_missing = [i for i in issues if "key is missing" in i.message]
assert len(key_missing) == 0

def test_name_not_string_in_frontmatter(self):
"""meta with non-string 'name' → fatal (e.g., name: yes → True)."""
issues = validate_skill_record(
{"name": "", "description": "desc", "path": "/skills/test"},
meta={"name": True, "description": "desc"}, # YAML: name: yes
)
fatal = [
i for i in issues if i.severity == "fatal" and "must be a string" in i.message
]
assert len(fatal) == 1
assert "name" in fatal[0].field
assert "bool" in fatal[0].message

def test_description_not_string_in_frontmatter(self):
"""meta with non-string 'description' → fatal."""
issues = validate_skill_record(
{"name": "test", "description": "", "path": "/skills/test"},
meta={"name": "test", "description": ["item1", "item2"]}, # YAML list
)
fatal = [
i for i in issues if i.severity == "fatal" and "must be a string" in i.message
]
assert len(fatal) == 1
assert "description" in fatal[0].field
assert "list" in fatal[0].message

def test_non_string_description_no_crash(self):
"""Non-string description in skill dict should not crash on XML check."""
# This tests the case where skill dict has non-string values
# (e.g., from index or malformed input)
issues = validate_skill_record(
{"name": "test", "description": ["item1", "item2"], "path": "/skills/test"},
meta={"name": "test", "description": ["item1", "item2"]},
)
# Should not raise TypeError, should return type error issue
fatal = [i for i in issues if i.severity == "fatal"]
assert len(fatal) >= 1
assert any("must be a string" in i.message for i in fatal)

def test_non_string_name_no_crash(self):
"""Non-string name in skill dict should not crash on validation."""
issues = validate_skill_record(
{"name": True, "description": "desc", "path": "/skills/test"},
meta={"name": True, "description": "desc"},
)
# Should not raise TypeError
fatal = [i for i in issues if i.severity == "fatal"]
assert len(fatal) >= 1
assert any("must be a string" in i.message for i in fatal)


class TestStrictMode:
"""strict mode behavior tests."""
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.