diff --git a/src/java/org/jivesoftware/multiplexer/net/XMLLightweightParser.java b/src/java/org/jivesoftware/multiplexer/net/XMLLightweightParser.java index c1b1b56..e8865de 100644 --- a/src/java/org/jivesoftware/multiplexer/net/XMLLightweightParser.java +++ b/src/java/org/jivesoftware/multiplexer/net/XMLLightweightParser.java @@ -167,6 +167,11 @@ char[] buf = charBuffer.array(); int readByte = charBuffer.remaining(); + // Just return if nothing was read + if (readByte == 0) { + return; + } + // Verify if the last received byte is an incomplete double byte character char lastChar = buf[readByte-1]; if (lastChar >= 0xfff0) { @@ -196,8 +201,26 @@ } // Robot. char ch; + boolean isHighSurrogate = false; for (int i = 0; i < readByte; i++) { ch = buf[i]; + if (isHighSurrogate) { + if (Character.isLowSurrogate(ch)) { + // Everything is fine. Clean up traces for surrogates + isHighSurrogate = false; + } + else { + // Trigger error. Found high surrogate not followed by low surrogate + throw new Exception("Found high surrogate not followed by low surrogate"); + } + } + else if (Character.isHighSurrogate(ch)) { + isHighSurrogate = true; + } + else if (Character.isLowSurrogate(ch)) { + // Trigger error. Found low surrogate char without a preceding high surrogate + throw new Exception("Found low surrogate char without a preceding high surrogate"); + } if (status == XMLLightweightParser.TAIL) { // Looking for the close tag if (depth < 1 && ch == head.charAt(tailCount)) { diff --git a/test/org/jivesoftware/multiplexer/net/XMLLightweightParserTest.java b/test/org/jivesoftware/multiplexer/net/XMLLightweightParserTest.java index ab91fd7..91aa6a3 100644 --- a/test/org/jivesoftware/multiplexer/net/XMLLightweightParserTest.java +++ b/test/org/jivesoftware/multiplexer/net/XMLLightweightParserTest.java @@ -11,8 +11,8 @@ package org.jivesoftware.multiplexer.net; -import junit.framework.TestCase; import junit.framework.Assert; +import junit.framework.TestCase; import org.apache.mina.common.ByteBuffer; import org.dom4j.Element; import org.dom4j.io.SAXReader; @@ -298,6 +298,32 @@ } } + public void testInvalidSurrogates() throws Exception { + byte[] one = ("").getBytes(); + byte[] two = {(byte) 0xed, (byte) 0xb3, (byte) 0xb1}; + byte[] three = "".getBytes(); + + byte[] message = new byte[one.length + two.length + three.length]; + int j = 0; + for (byte b : one) { + message[j++] = b; + } + for (byte b : two) { + message[j++] = b; + } + for (byte b : three) { + message[j++] = b; + } + + ByteBuffer mybuffer = ByteBuffer.wrap(message); + try { + parser.read(mybuffer); + fail("Failed to detect a low surrogate char without a preceding high surrogate"); + } catch (Exception e) { + assertEquals("Incorrect exception was received", "Found low surrogate char without a preceding high surrogate", e.getMessage()); + } + } + public void testRead() { try { XMLLightweightParser parser = new XMLLightweightParser("UTF-8");