-
Notifications
You must be signed in to change notification settings - Fork 7
feat: add validation rules for reserved words, XML tags, and type checks #54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
809be06
d36b53c
6c427b8
f4d2711
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
|
|
||
| from __future__ import annotations | ||
|
|
||
| import re | ||
| import unicodedata | ||
| from pathlib import Path | ||
|
|
||
|
|
@@ -13,6 +14,13 @@ | |
| DESCRIPTION_MAX_LENGTH = 1024 | ||
| COMPATIBILITY_MAX_LENGTH = 500 | ||
|
|
||
| # Reserved words that cannot appear in skill names | ||
| RESERVED_WORDS: frozenset[str] = frozenset({"anthropic", "claude"}) | ||
|
|
||
| # Pattern to detect XML-like tags (e.g., <tag>, </tag>, <tag attr="x"/>) | ||
| # Tags must start with a letter or "/" (for closing tags) | ||
| _XML_TAG_PATTERN = re.compile(r"</?[a-zA-Z][^>]*>") | ||
|
|
||
|
|
||
| def _is_valid_name_char(char: str) -> bool: | ||
| """Check if a character is valid for skill names (lowercase letter, digit, or hyphen).""" | ||
|
|
@@ -29,6 +37,20 @@ def _validate_name_chars(name: str) -> bool: | |
| return all(_is_valid_name_char(c) for c in normalized) | ||
|
|
||
|
|
||
| def _contains_reserved_word(name: str) -> str | None: | ||
| """Return the first reserved word found in name, or None.""" | ||
| name_lower = name.lower() | ||
| for word in RESERVED_WORDS: | ||
| if word in name_lower: | ||
| return word | ||
| return None | ||
|
|
||
|
|
||
| def _contains_xml_tags(text: str) -> bool: | ||
| """Check if text contains XML-like tags.""" | ||
| return bool(_XML_TAG_PATTERN.search(text)) | ||
|
|
||
|
|
||
| # Allowed top-level frontmatter properties | ||
| ALLOWED_FRONTMATTER_KEYS: set[str] = { | ||
| "name", | ||
|
|
@@ -83,12 +105,34 @@ def validate_skill_record( | |
| ) | ||
| ) | ||
|
|
||
| # Required fields (value checks) | ||
| if not name: | ||
| # Type checks (always run on skill values) | ||
| name_is_str = isinstance(name, str) | ||
| desc_is_str = isinstance(description, str) | ||
|
|
||
| if name and not name_is_str: | ||
| issues.append( | ||
| ValidationIssue( | ||
| severity="fatal", | ||
| message=f"frontmatter.name: must be a string (got {type(name).__name__})", | ||
| field="name", | ||
| ) | ||
| ) | ||
| if description and not desc_is_str: | ||
| issues.append( | ||
| ValidationIssue( | ||
| severity="fatal", | ||
| message=f"frontmatter.description: must be a string (got {type(description).__name__})", | ||
| field="description", | ||
| ) | ||
| ) | ||
|
|
||
| # Required fields (value checks) - only check if type is valid | ||
| if not name and name_is_str: | ||
| # Empty string | ||
| issues.append( | ||
| ValidationIssue(severity="fatal", message="frontmatter.name: missing", field="name") | ||
|
||
| ) | ||
| if not description: | ||
| if not description and desc_is_str: | ||
| issues.append( | ||
| ValidationIssue( | ||
| severity="fatal", | ||
|
|
@@ -116,7 +160,7 @@ def validate_skill_record( | |
| ) | ||
| ) | ||
|
|
||
| if name: | ||
| if name and name_is_str: | ||
| if len(name) > NAME_MAX_LENGTH: | ||
| issues.append( | ||
| ValidationIssue( | ||
|
|
@@ -149,8 +193,25 @@ def validate_skill_record( | |
| field="name", | ||
| ) | ||
| ) | ||
| reserved = _contains_reserved_word(name) | ||
| if reserved: | ||
| issues.append( | ||
| ValidationIssue( | ||
| severity="fatal", | ||
| message=f"frontmatter.name: cannot contain reserved word '{reserved}'", | ||
| field="name", | ||
| ) | ||
| ) | ||
| if _contains_xml_tags(name): | ||
| issues.append( | ||
| ValidationIssue( | ||
| severity="fatal", | ||
| message="frontmatter.name: cannot contain XML tags", | ||
| field="name", | ||
| ) | ||
| ) | ||
|
|
||
| if description: | ||
| if description and desc_is_str: | ||
| if len(description) > DESCRIPTION_MAX_LENGTH: | ||
| issues.append( | ||
| ValidationIssue( | ||
|
|
@@ -159,6 +220,14 @@ def validate_skill_record( | |
| field="description", | ||
| ) | ||
| ) | ||
| if _contains_xml_tags(description): | ||
| issues.append( | ||
| ValidationIssue( | ||
| severity="fatal", | ||
| message="frontmatter.description: cannot contain XML tags", | ||
|
Comment on lines
+223
to
+227
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If a SKILL.md frontmatter uses a non-string description (e.g., YAML list such as Useful? React with 👍 / 👎. |
||
| field="description", | ||
| ) | ||
| ) | ||
|
|
||
| # Check for unexpected frontmatter keys and compatibility (requires reading SKILL.md) | ||
| if path: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
| COMPATIBILITY_MAX_LENGTH, | ||
| DESCRIPTION_MAX_LENGTH, | ||
| NAME_MAX_LENGTH, | ||
| RESERVED_WORDS, | ||
| SKILL_LINE_THRESHOLD, | ||
| validate_skill_record, | ||
| ) | ||
|
|
@@ -135,6 +136,60 @@ def test_name_valid_hyphens(self): | |
| hyphen_issues = [i for i in issues if "hyphen" in i.message.lower()] | ||
| assert len(hyphen_issues) == 0 | ||
|
|
||
| @pytest.mark.parametrize("reserved", list(RESERVED_WORDS)) | ||
| def test_name_reserved_word(self, reserved: str): | ||
| """name containing reserved word → fatal.""" | ||
| name = f"my-{reserved}-skill" | ||
| issues = validate_skill_record( | ||
| {"name": name, "description": "desc", "path": f"/skills/{name}"} | ||
| ) | ||
| fatal = [i for i in issues if i.severity == "fatal" and "reserved" in i.message.lower()] | ||
| assert len(fatal) == 1 | ||
| assert reserved in fatal[0].message | ||
|
|
||
| def test_name_reserved_word_case_insensitive(self): | ||
| """Reserved word check should be case-insensitive.""" | ||
| # Note: name validation already fails on uppercase, but reserved check is independent | ||
| issues = validate_skill_record( | ||
| {"name": "my-skill", "description": "desc", "path": "/skills/my-skill"} | ||
| ) | ||
| reserved_issues = [i for i in issues if "reserved" in i.message.lower()] | ||
| assert len(reserved_issues) == 0 | ||
|
Comment on lines
+150
to
+157
|
||
|
|
||
| def test_name_xml_tags(self): | ||
| """name containing XML tags → fatal.""" | ||
| issues = validate_skill_record( | ||
| {"name": "<script>", "description": "desc", "path": "/skills/<script>"} | ||
| ) | ||
| fatal = [i for i in issues if i.severity == "fatal" and "xml" in i.message.lower()] | ||
| assert len(fatal) == 1 | ||
|
|
||
| def test_description_xml_tags(self): | ||
| """description containing XML tags → fatal.""" | ||
| issues = validate_skill_record( | ||
| { | ||
| "name": "my-skill", | ||
| "description": "A skill with <script>alert('xss')</script> injection", | ||
| "path": "/skills/my-skill", | ||
| } | ||
| ) | ||
| fatal = [i for i in issues if i.severity == "fatal" and "xml" in i.message.lower()] | ||
| assert len(fatal) == 1 | ||
| assert "description" in fatal[0].field | ||
|
|
||
| def test_description_no_xml_tags_ok(self): | ||
| """description without XML tags → ok.""" | ||
| issues = validate_skill_record( | ||
| { | ||
| "name": "my-skill", | ||
| "description": "A valid description with math like 3 < 5 or x > y", | ||
| "path": "/skills/my-skill", | ||
| } | ||
| ) | ||
| xml_issues = [i for i in issues if "xml" in i.message.lower()] | ||
| # "<" and ">" used separately (not forming tags) should pass | ||
| assert len(xml_issues) == 0 | ||
|
|
||
|
|
||
| class TestValidationWarning: | ||
| """Warning validation rules (exit code 0).""" | ||
|
|
@@ -405,6 +460,70 @@ def test_meta_none_skips_key_check(self): | |
| key_missing = [i for i in issues if "key is missing" in i.message] | ||
| assert len(key_missing) == 0 | ||
|
|
||
| def test_name_not_string_in_frontmatter(self): | ||
| """Non-string 'name' → fatal (e.g., name: yes → True).""" | ||
| issues = validate_skill_record( | ||
| {"name": True, "description": "desc", "path": "/skills/test"}, | ||
| meta={"name": True, "description": "desc"}, # YAML: name: yes | ||
| ) | ||
| fatal = [ | ||
| i for i in issues if i.severity == "fatal" and "must be a string" in i.message | ||
| ] | ||
| assert len(fatal) == 1 | ||
| assert "name" in fatal[0].field | ||
| assert "bool" in fatal[0].message | ||
|
|
||
| def test_description_not_string_in_frontmatter(self): | ||
| """Non-string 'description' → fatal.""" | ||
| issues = validate_skill_record( | ||
| {"name": "test", "description": ["item1", "item2"], "path": "/skills/test"}, | ||
| meta={"name": "test", "description": ["item1", "item2"]}, # YAML list | ||
| ) | ||
| fatal = [ | ||
| i for i in issues if i.severity == "fatal" and "must be a string" in i.message | ||
| ] | ||
| assert len(fatal) == 1 | ||
| assert "description" in fatal[0].field | ||
| assert "list" in fatal[0].message | ||
|
|
||
| def test_non_string_description_no_crash(self): | ||
| """Non-string description in skill dict should not crash on XML check.""" | ||
| # This tests the case where skill dict has non-string values | ||
| # (e.g., from index or malformed input) | ||
| issues = validate_skill_record( | ||
| {"name": "test", "description": ["item1", "item2"], "path": "/skills/test"}, | ||
| meta={"name": "test", "description": ["item1", "item2"]}, | ||
| ) | ||
| # Should not raise TypeError, should return type error issue | ||
| fatal = [i for i in issues if i.severity == "fatal"] | ||
| assert len(fatal) >= 1 | ||
| assert any("must be a string" in i.message for i in fatal) | ||
|
|
||
| def test_non_string_name_no_crash(self): | ||
| """Non-string name in skill dict should not crash on validation.""" | ||
| issues = validate_skill_record( | ||
| {"name": True, "description": "desc", "path": "/skills/test"}, | ||
| meta={"name": True, "description": "desc"}, | ||
| ) | ||
| # Should not raise TypeError | ||
| fatal = [i for i in issues if i.severity == "fatal"] | ||
| assert len(fatal) >= 1 | ||
| assert any("must be a string" in i.message for i in fatal) | ||
|
|
||
| def test_non_string_without_meta(self): | ||
| """Type check works even without meta (e.g., index-based validation).""" | ||
| # This is the critical case: validate via index without meta | ||
| issues = validate_skill_record( | ||
| {"name": True, "description": ["a", "b"], "path": "/skills/test"}, | ||
| meta=None, # No meta provided | ||
| ) | ||
| fatal = [i for i in issues if i.severity == "fatal"] | ||
| # Should detect both type errors | ||
| type_errors = [i for i in fatal if "must be a string" in i.message] | ||
| assert len(type_errors) == 2 | ||
| assert any("name" in i.field for i in type_errors) | ||
| assert any("description" in i.field for i in type_errors) | ||
|
|
||
|
|
||
| class TestStrictMode: | ||
| """strict mode behavior tests.""" | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reserved word detection uses substring matching, which will produce false positives. For example, a skill named "anthropology-101" would be incorrectly rejected because it contains "anthropic" as a substring. Consider using word boundary matching instead, such as using regex with \b word boundaries or splitting on hyphens and checking each component.