Skip to content

Commit

Permalink
Write 4-byte characters (surrogate pairs) instead of escapes
Browse files Browse the repository at this point in the history
  • Loading branch information
rnetuka committed Sep 12, 2024
1 parent 89b2381 commit 8a197e7
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/main/java/com/fasterxml/jackson/core/JsonGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ public enum Feature {
*/
WRITE_BIGDECIMAL_AS_PLAIN(false),

WRITE_UTF8_SURROGATES(false),

// // Schema/Validity support features

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ public enum StreamWriteFeature
*/
WRITE_BIGDECIMAL_AS_PLAIN(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN),

WRITE_UTF8_SURROGATES(JsonGenerator.Feature.WRITE_UTF8_SURROGATES),

// // Schema/Validity support features

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.*;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.io.CharTypes;
Expand Down Expand Up @@ -659,6 +660,10 @@ public void writeUTF8String(byte[] text, int offset, int len) throws IOException
_outputBuffer[_outputTail++] = _quoteChar;
}

private boolean isSurrogatePair(char ch) {
return (ch & 0xD800) == 0xD800;
}

/*
/**********************************************************
/* Output method implementations, unprocessed ("raw")
Expand Down Expand Up @@ -1489,6 +1494,8 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
final byte[] outputBuffer = _outputBuffer;
final int[] escCodes = _outputEscapes;

boolean writeSurrogates = Feature.WRITE_UTF8_SURROGATES.enabledIn(_features);

while (offset < end) {
int ch = cbuf[offset++];
if (ch <= 0x7F) {
Expand All @@ -1510,7 +1517,14 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
// multibyte character
if (writeSurrogates && isSurrogatePair((char) ch) && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = cbuf[offset++];
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
}
}
}
_outputTail = outputPtr;
Expand All @@ -1527,6 +1541,8 @@ private final void _writeStringSegment2(final String text, int offset, final int
final byte[] outputBuffer = _outputBuffer;
final int[] escCodes = _outputEscapes;

boolean writeSurrogates = Feature.WRITE_UTF8_SURROGATES.enabledIn(_features);

while (offset < end) {
int ch = text.charAt(offset++);
if (ch <= 0x7F) {
Expand All @@ -1548,7 +1564,14 @@ private final void _writeStringSegment2(final String text, int offset, final int
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
// multibyte character
if (writeSurrogates && isSurrogatePair((char) ch) && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = text.charAt(offset++);
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
}
}
}
_outputTail = outputPtr;
Expand Down Expand Up @@ -2133,6 +2156,13 @@ protected final void _outputSurrogates(int surr1, int surr2) throws IOException
bbuf[_outputTail++] = (byte) (0x80 | (c & 0x3f));
}

private int _outputSurrogatePair(char highSurrogate, char lowSurrogate, int outputPtr) {
String s = String.valueOf(highSurrogate) + lowSurrogate;
byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
System.arraycopy(bytes, 0, _outputBuffer, outputPtr, bytes.length);
return outputPtr + bytes.length;
}

/**
*
* @param ch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ void configDefaults() throws IOException

assertFalse(g.isEnabled(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN));
assertFalse(g.isEnabled(StreamWriteFeature.WRITE_BIGDECIMAL_AS_PLAIN));
assertFalse(g.isEnabled(StreamWriteFeature.WRITE_UTF8_SURROGATES));

assertTrue(g.canOmitFields());
assertFalse(g.canWriteBinaryNatively());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import java.util.Random;

import static com.fasterxml.jackson.core.JsonGenerator.Feature;
import static org.junit.jupiter.api.Assertions.*;

import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -86,6 +87,17 @@ void longerRandomMultiChunk() throws Exception
}
}

@Test
public void testWritingSurrogatePairs() throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
JsonGenerator generator = FACTORY.createGenerator(stream, JsonEncoding.UTF8).enable(Feature.WRITE_UTF8_SURROGATES);
String string = "システム\uD867\uDE3D"; // システム𩸽
generator.writeString(string);
generator.flush();
generator.close();
assertEquals("\"" + string + "\"", stream.toString());
}

/*
/**********************************************************
/* Internal methods
Expand Down

0 comments on commit 8a197e7

Please sign in to comment.