diff --git a/src/java/org/jivesoftware/multiplexer/net/MXParser.java b/src/java/org/jivesoftware/multiplexer/net/MXParser.java index 6c66adb..9e8153b 100644 --- a/src/java/org/jivesoftware/multiplexer/net/MXParser.java +++ b/src/java/org/jivesoftware/multiplexer/net/MXParser.java @@ -44,8 +44,8 @@ private long lastHeartbeat = 0; @Override - protected int nextImpl() - throws XmlPullParserException, IOException + protected int nextImpl() + throws XmlPullParserException, IOException { text = null; pcEnd = pcStart = 0; @@ -182,7 +182,7 @@ // } } else { throw new XmlPullParserException( - "unexpected character in markup "+printable(ch), this, null); + "unexpected character in markup "+printable(ch), this, null); } } else if(ch == '?') { parsePI(); @@ -202,7 +202,7 @@ return eventType = parseStartTag(); } else { throw new XmlPullParserException( - "unexpected character in markup "+printable(ch), this, null); + "unexpected character in markup "+printable(ch), this, null); } // do content comapctation if it makes sense!!!! @@ -223,8 +223,8 @@ entityRefName = newString(buf, posStart, posEnd - posStart); } throw new XmlPullParserException( - "could not resolve entity named '"+printable(entityRefName)+"'", - this, null); + "could not resolve entity named '"+printable(entityRefName)+"'", + this, null); } //int entStart = posStart; //int entEnd = posEnd; @@ -289,7 +289,7 @@ } } else if(seenBracketBracket && ch == '>') { throw new XmlPullParserException( - "characters ]]> are not allowed in content", this, null); + "characters ]]> are not allowed in content", this, null); } else { if(seenBracket) { seenBracketBracket = seenBracket = false; @@ -363,27 +363,50 @@ reader = oldReader; inputEncoding = oldEncoding; } - - /** - * Makes sure that each individual character is a valid XML character. - * - * Note that when MXParser is being modified to handle multibyte chars correctly, this method needs to change (as - * then, there are more codepoints to check). - */ + + private boolean highSurrogateSeen = false; + + /** + * Makes sure that each individual character is a valid XML character. + * + * Note that when MXParser is being modified to handle multibyte chars correctly, this method needs to change (as + * then, there are more codepoints to check). + * + */ @Override protected char more() throws IOException, XmlPullParserException { - final char codePoint = super.more(); // note - this does NOT return a codepoint now, but simply a (single byte) character! - if ((codePoint == 0x0) || // 0x0 is not allowed, but flash clients insist on sending this as the very first character of a stream. We should stop allowing this codepoint after the first byte has been parsed. - (codePoint == 0x9) || - (codePoint == 0xA) || - (codePoint == 0xD) || - ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || - ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || - ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { - return codePoint; - } - - throw new XmlPullParserException("Illegal XML character: " + Integer.parseInt(codePoint+"", 16)); + final char codePoint = super.more(); // note - this does NOT return a codepoint now, but simply a (double byte) character! + boolean validCodepoint = false; + boolean isLowSurrogate = Character.isLowSurrogate(codePoint); + if ((codePoint == 0x0) || // 0x0 is not allowed, but flash clients insist on sending this as the very first character of a stream. We should stop allowing this codepoint after the first byte has been parsed. + (codePoint == 0x9) || + (codePoint == 0xA) || + (codePoint == 0xD) || + ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || + ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))) { + validCodepoint = true; + } + else if (highSurrogateSeen) { + if (isLowSurrogate) { + validCodepoint = true; + } else { + throw new XmlPullParserException("High surrogate followed by non low surrogate '0x" + String.format("%x", (int) codePoint) + "'"); + } + } + else if (isLowSurrogate) { + throw new XmlPullParserException("Low surrogate '0x " + String.format("%x", (int) codePoint) + " without preceeding high surrogate"); + } + else if (Character.isHighSurrogate(codePoint)) { + highSurrogateSeen = true; + // Return here so that highSurrogateSeen is not reset + return codePoint; + } + // Always reset high surrogate seen + highSurrogateSeen = false; + if (validCodepoint) + return codePoint; + + throw new XmlPullParserException("Illegal XML character '0x" + String.format("%x", (int) codePoint) + "'"); } }