diff --git a/emitterc.go b/emitterc.go index 354f5c6..c6b6ba1 100644 --- a/emitterc.go +++ b/emitterc.go @@ -25,6 +25,7 @@ package yaml import ( "bytes" "fmt" + "unicode/utf8" ) // Flush the buffer if needed. @@ -1583,7 +1584,11 @@ func yaml_emitter_write_tag_content(emitter *yaml_emitter_t, value []byte, allow return false } } else { - w := width(value[i]) + r, w := utf8.DecodeRune(value[i:]) + if r == utf8.RuneError { + yaml_emitter_set_emitter_error(emitter, fmt.Sprintf("invalid UTF-8 in tag %+q", value)) + return false + } for k := 0; k < w; k++ { octet := value[i] i++ diff --git a/fuzz_test.go b/fuzz_test.go index d2bf150..06bc2a2 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -158,3 +158,70 @@ func FuzzDecodeEncodeDecode(f *testing.F) { compareNodes(&v, &v2) }) } + +func FuzzTag(f *testing.F) { + for _, tag := range []string{ + // Special characters. + "\x00", + "\x20", // Space + "\t", + "\r", + "\n", + + // Invalid UTF-8. + "\xff", + "\xca", + "\xf0\x9f\xa4", + + // Tag characters. + "!", + "!!", + "!", + + // Comment characters. + "#", + "#\n#", + + // Flow collection characters. + "[]", + "{}", + ",", + + // Just text. + "a", + "foo", + + // Unicode. + "тег", + "🤷", + + // Existing tags. + "str", + "tag:yaml.org,2002:str", + } { + f.Add(tag) + } + + f.Fuzz(func(t *testing.T, tag string) { + n := &yaml.Node{ + Kind: yaml.ScalarNode, + Tag: tag, + Value: "foo", + } + + data, err := yaml.Marshal(n) + if err != nil { + require.ErrorContains(t, err, "invalid UTF-8 in tag") + return + } + + var n2 *yaml.Node + require.NoError(t, yaml.Unmarshal(data, &n2)) + if n2.Kind == yaml.DocumentNode { + n2 = n2.Content[0] + } + + require.Equal(t, n.LongTag(), n2.LongTag()) + require.Equal(t, n.Value, n2.Value) + }) +}