@@ -61,6 +61,42 @@ public class XML {
6161
6262 /** The Character '/'. */
6363 public static final Character SLASH = '/' ;
64+
65+ /**
66+ * Creates an iterator for navigating Code Points in a string instead of
67+ * characters.
68+ *
69+ * @see <a href=
70+ * "http://stackoverflow.com/a/21791059/6030888">http://stackoverflow.com/a/21791059/6030888</a>
71+ */
72+ private static Iterable <Integer > codePointIterator (final String string ) {
73+ return new Iterable <Integer >() {
74+ @ Override
75+ public Iterator <Integer > iterator () {
76+ return new Iterator <Integer >() {
77+ private int nextIndex = 0 ;
78+ private int length = string .length ();
79+
80+ @ Override
81+ public boolean hasNext () {
82+ return this .nextIndex < this .length ;
83+ }
84+
85+ @ Override
86+ public Integer next () {
87+ int result = string .codePointAt (this .nextIndex );
88+ this .nextIndex += Character .charCount (result );
89+ return result ;
90+ }
91+
92+ @ Override
93+ public void remove () {
94+ throw new UnsupportedOperationException ();
95+ }
96+ };
97+ }
98+ };
99+ }
64100
65101 /**
66102 * Replace special characters with XML escapes:
@@ -79,8 +115,7 @@ public class XML {
79115 */
80116 public static String escape (String string ) {
81117 StringBuilder sb = new StringBuilder (string .length ());
82- for (int i = 0 , length = string .length (); i < length ; i ++) {
83- char c = string .charAt (i );
118+ for (final int c : codePointIterator (string )) {
84119 switch (c ) {
85120 case '&' :
86121 sb .append ("&" );
@@ -98,18 +133,18 @@ public static String escape(String string) {
98133 sb .append ("'" );
99134 break ;
100135 default :
101- if (c < ' ' || ( c >= '\u0080' && c < '\u00a0' ) || ( c >= '\u2000' && c < '\u2100' )) {
136+ if (Character . isISOControl ( c )) {
102137 sb .append ("&#x" );
103138 sb .append (Integer .toHexString (c ));
104139 sb .append (";" );
105140 } else {
106- sb .append (c );
141+ sb .append (new String ( Character . toChars ( c )) );
107142 }
108143 }
109144 }
110145 return sb .toString ();
111146 }
112-
147+
113148 /**
114149 * Removes XML escapes from the string.
115150 *
0 commit comments