diff --git a/src/skillport/modules/skills/internal/validation.py b/src/skillport/modules/skills/internal/validation.py index 31b501c..dac8234 100644 --- a/src/skillport/modules/skills/internal/validation.py +++ b/src/skillport/modules/skills/internal/validation.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re import unicodedata from pathlib import Path @@ -13,6 +14,13 @@ DESCRIPTION_MAX_LENGTH = 1024 COMPATIBILITY_MAX_LENGTH = 500 +# Reserved words that cannot appear in skill names +RESERVED_WORDS: frozenset[str] = frozenset({"anthropic", "claude"}) + +# Pattern to detect XML-like tags (e.g., , , ) +# Tags must start with a letter or "/" (for closing tags) +_XML_TAG_PATTERN = re.compile(r"]*>") + def _is_valid_name_char(char: str) -> bool: """Check if a character is valid for skill names (lowercase letter, digit, or hyphen).""" @@ -29,6 +37,20 @@ def _validate_name_chars(name: str) -> bool: return all(_is_valid_name_char(c) for c in normalized) +def _contains_reserved_word(name: str) -> str | None: + """Return the first reserved word found in name, or None.""" + name_lower = name.lower() + for word in RESERVED_WORDS: + if word in name_lower: + return word + return None + + +def _contains_xml_tags(text: str) -> bool: + """Check if text contains XML-like tags.""" + return bool(_XML_TAG_PATTERN.search(text)) + + # Allowed top-level frontmatter properties ALLOWED_FRONTMATTER_KEYS: set[str] = { "name", @@ -83,12 +105,34 @@ def validate_skill_record( ) ) - # Required fields (value checks) - if not name: + # Type and required field checks + name_is_str = isinstance(name, str) + desc_is_str = isinstance(description, str) + + # name: must be non-empty string + if not name_is_str: + issues.append( + ValidationIssue( + severity="fatal", + message=f"frontmatter.name: must be a string (got {type(name).__name__})", + field="name", + ) + ) + elif not name: issues.append( ValidationIssue(severity="fatal", message="frontmatter.name: missing", field="name") ) - if not description: + + # description: must be non-empty string + if not desc_is_str: + issues.append( + ValidationIssue( + severity="fatal", + message=f"frontmatter.description: must be a string (got {type(description).__name__})", + field="description", + ) + ) + elif not description: issues.append( ValidationIssue( severity="fatal", @@ -116,7 +160,7 @@ def validate_skill_record( ) ) - if name: + if name and name_is_str: if len(name) > NAME_MAX_LENGTH: issues.append( ValidationIssue( @@ -149,8 +193,25 @@ def validate_skill_record( field="name", ) ) + reserved = _contains_reserved_word(name) + if reserved: + issues.append( + ValidationIssue( + severity="fatal", + message=f"frontmatter.name: cannot contain reserved word '{reserved}'", + field="name", + ) + ) + if _contains_xml_tags(name): + issues.append( + ValidationIssue( + severity="fatal", + message="frontmatter.name: cannot contain XML tags", + field="name", + ) + ) - if description: + if description and desc_is_str: if len(description) > DESCRIPTION_MAX_LENGTH: issues.append( ValidationIssue( @@ -159,6 +220,14 @@ def validate_skill_record( field="description", ) ) + if _contains_xml_tags(description): + issues.append( + ValidationIssue( + severity="fatal", + message="frontmatter.description: cannot contain XML tags", + field="description", + ) + ) # Check for unexpected frontmatter keys and compatibility (requires reading SKILL.md) if path: diff --git a/tests/unit/test_validation_rules.py b/tests/unit/test_validation_rules.py index 7de2021..e2e45f7 100644 --- a/tests/unit/test_validation_rules.py +++ b/tests/unit/test_validation_rules.py @@ -7,6 +7,7 @@ COMPATIBILITY_MAX_LENGTH, DESCRIPTION_MAX_LENGTH, NAME_MAX_LENGTH, + RESERVED_WORDS, SKILL_LINE_THRESHOLD, validate_skill_record, ) @@ -135,6 +136,60 @@ def test_name_valid_hyphens(self): hyphen_issues = [i for i in issues if "hyphen" in i.message.lower()] assert len(hyphen_issues) == 0 + @pytest.mark.parametrize("reserved", list(RESERVED_WORDS)) + def test_name_reserved_word(self, reserved: str): + """name containing reserved word → fatal.""" + name = f"my-{reserved}-skill" + issues = validate_skill_record( + {"name": name, "description": "desc", "path": f"/skills/{name}"} + ) + fatal = [i for i in issues if i.severity == "fatal" and "reserved" in i.message.lower()] + assert len(fatal) == 1 + assert reserved in fatal[0].message + + def test_name_reserved_word_case_insensitive(self): + """Reserved word check should be case-insensitive.""" + # Note: name validation already fails on uppercase, but reserved check is independent + issues = validate_skill_record( + {"name": "my-skill", "description": "desc", "path": "/skills/my-skill"} + ) + reserved_issues = [i for i in issues if "reserved" in i.message.lower()] + assert len(reserved_issues) == 0 + + def test_name_xml_tags(self): + """name containing XML tags → fatal.""" + issues = validate_skill_record( + {"name": " injection", + "path": "/skills/my-skill", + } + ) + fatal = [i for i in issues if i.severity == "fatal" and "xml" in i.message.lower()] + assert len(fatal) == 1 + assert "description" in fatal[0].field + + def test_description_no_xml_tags_ok(self): + """description without XML tags → ok.""" + issues = validate_skill_record( + { + "name": "my-skill", + "description": "A valid description with math like 3 < 5 or x > y", + "path": "/skills/my-skill", + } + ) + xml_issues = [i for i in issues if "xml" in i.message.lower()] + # "<" and ">" used separately (not forming tags) should pass + assert len(xml_issues) == 0 + class TestValidationWarning: """Warning validation rules (exit code 0).""" @@ -405,6 +460,90 @@ def test_meta_none_skips_key_check(self): key_missing = [i for i in issues if "key is missing" in i.message] assert len(key_missing) == 0 + def test_name_not_string_in_frontmatter(self): + """Non-string 'name' → fatal (e.g., name: yes → True).""" + issues = validate_skill_record( + {"name": True, "description": "desc", "path": "/skills/test"}, + meta={"name": True, "description": "desc"}, # YAML: name: yes + ) + fatal = [ + i for i in issues if i.severity == "fatal" and "must be a string" in i.message + ] + assert len(fatal) == 1 + assert "name" in fatal[0].field + assert "bool" in fatal[0].message + + def test_description_not_string_in_frontmatter(self): + """Non-string 'description' → fatal.""" + issues = validate_skill_record( + {"name": "test", "description": ["item1", "item2"], "path": "/skills/test"}, + meta={"name": "test", "description": ["item1", "item2"]}, # YAML list + ) + fatal = [ + i for i in issues if i.severity == "fatal" and "must be a string" in i.message + ] + assert len(fatal) == 1 + assert "description" in fatal[0].field + assert "list" in fatal[0].message + + def test_non_string_description_no_crash(self): + """Non-string description in skill dict should not crash on XML check.""" + # This tests the case where skill dict has non-string values + # (e.g., from index or malformed input) + issues = validate_skill_record( + {"name": "test", "description": ["item1", "item2"], "path": "/skills/test"}, + meta={"name": "test", "description": ["item1", "item2"]}, + ) + # Should not raise TypeError, should return type error issue + fatal = [i for i in issues if i.severity == "fatal"] + assert len(fatal) >= 1 + assert any("must be a string" in i.message for i in fatal) + + def test_non_string_name_no_crash(self): + """Non-string name in skill dict should not crash on validation.""" + issues = validate_skill_record( + {"name": True, "description": "desc", "path": "/skills/test"}, + meta={"name": True, "description": "desc"}, + ) + # Should not raise TypeError + fatal = [i for i in issues if i.severity == "fatal"] + assert len(fatal) >= 1 + assert any("must be a string" in i.message for i in fatal) + + def test_non_string_without_meta(self): + """Type check works even without meta (e.g., index-based validation).""" + # This is the critical case: validate via index without meta + issues = validate_skill_record( + {"name": True, "description": ["a", "b"], "path": "/skills/test"}, + meta=None, # No meta provided + ) + fatal = [i for i in issues if i.severity == "fatal"] + # Should detect both type errors + type_errors = [i for i in fatal if "must be a string" in i.message] + assert len(type_errors) == 2 + assert any("name" in i.field for i in type_errors) + assert any("description" in i.field for i in type_errors) + + def test_falsy_non_string_name_detected(self): + """Falsy non-string name (null, [], False) should be detected as type error.""" + for falsy_value in [None, [], False, 0]: + issues = validate_skill_record( + {"name": falsy_value, "description": "desc", "path": "/skills/test"}, + ) + fatal = [i for i in issues if i.severity == "fatal" and i.field == "name"] + assert len(fatal) >= 1, f"Failed for name={falsy_value!r}" + assert any("must be a string" in i.message for i in fatal), f"Failed for name={falsy_value!r}" + + def test_falsy_non_string_description_detected(self): + """Falsy non-string description (null, [], False) should be detected as type error.""" + for falsy_value in [None, [], False, 0]: + issues = validate_skill_record( + {"name": "test", "description": falsy_value, "path": "/skills/test"}, + ) + fatal = [i for i in issues if i.severity == "fatal" and i.field == "description"] + assert len(fatal) >= 1, f"Failed for description={falsy_value!r}" + assert any("must be a string" in i.message for i in fatal), f"Failed for description={falsy_value!r}" + class TestStrictMode: """strict mode behavior tests.""" diff --git a/uv.lock b/uv.lock index 8bd7f8f..64e2052 100644 --- a/uv.lock +++ b/uv.lock @@ -1832,7 +1832,7 @@ wheels = [ [[package]] name = "skillport" -version = "0.5.2" +version = "0.6.1" source = { editable = "." } dependencies = [ { name = "fastmcp" },