001/**
002 *
003 * Copyright 2003-2007 Jive Software.
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.jivesoftware.smack.util;
019
020import java.io.UnsupportedEncodingException;
021import java.security.MessageDigest;
022import java.security.NoSuchAlgorithmException;
023import java.util.Random;
024import java.util.logging.Level;
025import java.util.logging.Logger;
026
027/**
028 * A collection of utility methods for String objects.
029 */
030public class StringUtils {
031    private static final Logger LOGGER = Logger.getLogger(StringUtils.class.getName());
032
033    public static final String QUOTE_ENCODE = """;
034    public static final String APOS_ENCODE = "'";
035    public static final String AMP_ENCODE = "&";
036    public static final String LT_ENCODE = "<";
037    public static final String GT_ENCODE = ">";
038
039    /**
040     * Returns the name portion of a XMPP address. For example, for the
041     * address "matt@jivesoftware.com/Smack", "matt" would be returned. If no
042     * username is present in the address, the empty string will be returned.
043     *
044     * @param XMPPAddress the XMPP address.
045     * @return the name portion of the XMPP address.
046     */
047    public static String parseName(String XMPPAddress) {
048        if (XMPPAddress == null) {
049            return null;
050        }
051        int atIndex = XMPPAddress.lastIndexOf("@");
052        if (atIndex <= 0) {
053            return "";
054        }
055        else {
056            return XMPPAddress.substring(0, atIndex);
057        }
058    }
059
060    /**
061     * Returns the server portion of a XMPP address. For example, for the
062     * address "matt@jivesoftware.com/Smack", "jivesoftware.com" would be returned.
063     * If no server is present in the address, the empty string will be returned.
064     *
065     * @param XMPPAddress the XMPP address.
066     * @return the server portion of the XMPP address.
067     */
068    public static String parseServer(String XMPPAddress) {
069        if (XMPPAddress == null) {
070            return null;
071        }
072        int atIndex = XMPPAddress.lastIndexOf("@");
073        // If the String ends with '@', return the empty string.
074        if (atIndex + 1 > XMPPAddress.length()) {
075            return "";
076        }
077        int slashIndex = XMPPAddress.indexOf("/");
078        if (slashIndex > 0 && slashIndex > atIndex) {
079            return XMPPAddress.substring(atIndex + 1, slashIndex);
080        }
081        else {
082            return XMPPAddress.substring(atIndex + 1);
083        }
084    }
085
086    /**
087     * Returns the resource portion of a XMPP address. For example, for the
088     * address "matt@jivesoftware.com/Smack", "Smack" would be returned. If no
089     * resource is present in the address, the empty string will be returned.
090     *
091     * @param XMPPAddress the XMPP address.
092     * @return the resource portion of the XMPP address.
093     */
094    public static String parseResource(String XMPPAddress) {
095        if (XMPPAddress == null) {
096            return null;
097        }
098        int slashIndex = XMPPAddress.indexOf("/");
099        if (slashIndex + 1 > XMPPAddress.length() || slashIndex < 0) {
100            return "";
101        }
102        else {
103            return XMPPAddress.substring(slashIndex + 1);
104        }
105    }
106
107    /**
108     * Returns the XMPP address with any resource information removed. For example,
109     * for the address "matt@jivesoftware.com/Smack", "matt@jivesoftware.com" would
110     * be returned.
111     *
112     * @param XMPPAddress the XMPP address.
113     * @return the bare XMPP address without resource information.
114     */
115    public static String parseBareAddress(String XMPPAddress) {
116        if (XMPPAddress == null) {
117            return null;
118        }
119        int slashIndex = XMPPAddress.indexOf("/");
120        if (slashIndex < 0) {
121            return XMPPAddress;
122        }
123        else if (slashIndex == 0) {
124            return "";
125        }
126        else {
127            return XMPPAddress.substring(0, slashIndex);
128        }
129    }
130
131    /**
132     * Returns true if jid is a full JID (i.e. a JID with resource part).
133     *
134     * @param jid
135     * @return true if full JID, false otherwise
136     */
137    public static boolean isFullJID(String jid) {
138        if (parseName(jid).length() <= 0 || parseServer(jid).length() <= 0
139                || parseResource(jid).length() <= 0) {
140            return false;
141        }
142        return true;
143    }
144
145    /**
146     * Escapes the node portion of a JID according to "JID Escaping" (JEP-0106).
147     * Escaping replaces characters prohibited by node-prep with escape sequences,
148     * as follows:<p>
149     *
150     * <table border="1">
151     * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
152     * <tr><td>&lt;space&gt;</td><td>\20</td></tr>
153     * <tr><td>"</td><td>\22</td></tr>
154     * <tr><td>&</td><td>\26</td></tr>
155     * <tr><td>'</td><td>\27</td></tr>
156     * <tr><td>/</td><td>\2f</td></tr>
157     * <tr><td>:</td><td>\3a</td></tr>
158     * <tr><td>&lt;</td><td>\3c</td></tr>
159     * <tr><td>&gt;</td><td>\3e</td></tr>
160     * <tr><td>@</td><td>\40</td></tr>
161     * <tr><td>\</td><td>\5c</td></tr>
162     * </table><p>
163     *
164     * This process is useful when the node comes from an external source that doesn't
165     * conform to nodeprep. For example, a username in LDAP may be "Joe Smith". Because
166     * the &lt;space&gt; character isn't a valid part of a node, the username should
167     * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
168     * after case-folding, etc. has been applied).<p>
169     *
170     * All node escaping and un-escaping must be performed manually at the appropriate
171     * time; the JID class will not escape or un-escape automatically.
172     *
173     * @param node the node.
174     * @return the escaped version of the node.
175     */
176    public static String escapeNode(String node) {
177        if (node == null) {
178            return null;
179        }
180        StringBuilder buf = new StringBuilder(node.length() + 8);
181        for (int i=0, n=node.length(); i<n; i++) {
182            char c = node.charAt(i);
183            switch (c) {
184                case '"': buf.append("\\22"); break;
185                case '&': buf.append("\\26"); break;
186                case '\'': buf.append("\\27"); break;
187                case '/': buf.append("\\2f"); break;
188                case ':': buf.append("\\3a"); break;
189                case '<': buf.append("\\3c"); break;
190                case '>': buf.append("\\3e"); break;
191                case '@': buf.append("\\40"); break;
192                case '\\': buf.append("\\5c"); break;
193                default: {
194                    if (Character.isWhitespace(c)) {
195                        buf.append("\\20");
196                    }
197                    else {
198                        buf.append(c);
199                    }
200                }
201            }
202        }
203        return buf.toString();
204    }
205
206    /**
207     * Un-escapes the node portion of a JID according to "JID Escaping" (JEP-0106).<p>
208     * Escaping replaces characters prohibited by node-prep with escape sequences,
209     * as follows:<p>
210     *
211     * <table border="1">
212     * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
213     * <tr><td>&lt;space&gt;</td><td>\20</td></tr>
214     * <tr><td>"</td><td>\22</td></tr>
215     * <tr><td>&</td><td>\26</td></tr>
216     * <tr><td>'</td><td>\27</td></tr>
217     * <tr><td>/</td><td>\2f</td></tr>
218     * <tr><td>:</td><td>\3a</td></tr>
219     * <tr><td>&lt;</td><td>\3c</td></tr>
220     * <tr><td>&gt;</td><td>\3e</td></tr>
221     * <tr><td>@</td><td>\40</td></tr>
222     * <tr><td>\</td><td>\5c</td></tr>
223     * </table><p>
224     *
225     * This process is useful when the node comes from an external source that doesn't
226     * conform to nodeprep. For example, a username in LDAP may be "Joe Smith". Because
227     * the &lt;space&gt; character isn't a valid part of a node, the username should
228     * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
229     * after case-folding, etc. has been applied).<p>
230     *
231     * All node escaping and un-escaping must be performed manually at the appropriate
232     * time; the JID class will not escape or un-escape automatically.
233     *
234     * @param node the escaped version of the node.
235     * @return the un-escaped version of the node.
236     */
237    public static String unescapeNode(String node) {
238        if (node == null) {
239            return null;
240        }
241        char [] nodeChars = node.toCharArray();
242        StringBuilder buf = new StringBuilder(nodeChars.length);
243        for (int i=0, n=nodeChars.length; i<n; i++) {
244            compare: {
245                char c = node.charAt(i);
246                if (c == '\\' && i+2<n) {
247                    char c2 = nodeChars[i+1];
248                    char c3 = nodeChars[i+2];
249                    if (c2 == '2') {
250                        switch (c3) {
251                            case '0': buf.append(' '); i+=2; break compare;
252                            case '2': buf.append('"'); i+=2; break compare;
253                            case '6': buf.append('&'); i+=2; break compare;
254                            case '7': buf.append('\''); i+=2; break compare;
255                            case 'f': buf.append('/'); i+=2; break compare;
256                        }
257                    }
258                    else if (c2 == '3') {
259                        switch (c3) {
260                            case 'a': buf.append(':'); i+=2; break compare;
261                            case 'c': buf.append('<'); i+=2; break compare;
262                            case 'e': buf.append('>'); i+=2; break compare;
263                        }
264                    }
265                    else if (c2 == '4') {
266                        if (c3 == '0') {
267                            buf.append("@");
268                            i+=2;
269                            break compare;
270                        }
271                    }
272                    else if (c2 == '5') {
273                        if (c3 == 'c') {
274                            buf.append("\\");
275                            i+=2;
276                            break compare;
277                        }
278                    }
279                }
280                buf.append(c);
281            }
282        }
283        return buf.toString();
284    }
285
286    /**
287     * Escapes all necessary characters in the String so that it can be used
288     * in an XML doc.
289     *
290     * @param string the string to escape.
291     * @return the string with appropriate characters escaped.
292     */
293    public static CharSequence escapeForXML(final String string) {
294        if (string == null) {
295            return null;
296        }
297        final char[] input = string.toCharArray();
298        final int len = input.length;
299        final StringBuilder out = new StringBuilder((int)(len*1.3));
300        CharSequence toAppend;
301        char ch;
302        int last = 0;
303        int i = 0;
304        while (i < len) {
305            toAppend = null;
306            ch = input[i];
307            switch(ch) {
308            case '<':
309                toAppend = LT_ENCODE;
310                break;
311            case '>':
312                toAppend = GT_ENCODE;
313                break;
314            case '&':
315                toAppend = AMP_ENCODE;
316                break;
317            case '"':
318                toAppend = QUOTE_ENCODE;
319                break;
320            case '\'':
321                toAppend = APOS_ENCODE;
322                break;
323            default:
324                break;
325            }
326            if (toAppend != null) {
327                if (i > last) {
328                    out.append(input, last, i - last);
329                }
330                out.append(toAppend);
331                last = ++i;
332            } else {
333                i++;
334            }
335        }
336        if (last == 0) {
337            return string;
338        }
339        if (i > last) {
340            out.append(input, last, i - last);
341        }
342        return out;
343    }
344
345    /**
346     * Used by the hash method.
347     */
348    private static MessageDigest digest = null;
349
350    /**
351     * Hashes a String using the SHA-1 algorithm and returns the result as a
352     * String of hexadecimal numbers. This method is synchronized to avoid
353     * excessive MessageDigest object creation. If calling this method becomes
354     * a bottleneck in your code, you may wish to maintain a pool of
355     * MessageDigest objects instead of using this method.
356     * <p>
357     * A hash is a one-way function -- that is, given an
358     * input, an output is easily computed. However, given the output, the
359     * input is almost impossible to compute. This is useful for passwords
360     * since we can store the hash and a hacker will then have a very hard time
361     * determining the original password.
362     *
363     * @param data the String to compute the hash of.
364     * @return a hashed version of the passed-in String
365     */
366    public synchronized static String hash(String data) {
367        if (digest == null) {
368            try {
369                digest = MessageDigest.getInstance("SHA-1");
370            }
371            catch (NoSuchAlgorithmException nsae) {
372                LOGGER.log(Level.SEVERE, "Failed to load the SHA-1 MessageDigest. Smack will be unable to function normally.", nsae);
373            }
374        }
375        // Now, compute hash.
376        try {
377            digest.update(data.getBytes("UTF-8"));
378        }
379        catch (UnsupportedEncodingException e) {
380            LOGGER.log(Level.SEVERE, "Error computing hash", e);
381        }
382        return encodeHex(digest.digest());
383    }
384
385    /**
386     * Encodes an array of bytes as String representation of hexadecimal.
387     *
388     * @param bytes an array of bytes to convert to a hex string.
389     * @return generated hex string.
390     */
391    public static String encodeHex(byte[] bytes) {
392        StringBuilder hex = new StringBuilder(bytes.length * 2);
393
394        for (byte aByte : bytes) {
395            if (((int) aByte & 0xff) < 0x10) {
396                hex.append("0");
397            }
398            hex.append(Integer.toString((int) aByte & 0xff, 16));
399        }
400
401        return hex.toString();
402    }
403
404    /**
405     * Encodes a String as a base64 String.
406     *
407     * @param data a String to encode.
408     * @return a base64 encoded String.
409     */
410    public static String encodeBase64(String data) {
411        byte [] bytes = null;
412        try {
413            bytes = data.getBytes("ISO-8859-1");
414        }
415        catch (UnsupportedEncodingException uee) {
416            throw new IllegalStateException(uee);
417        }
418        return encodeBase64(bytes);
419    }
420
421    /**
422     * Encodes a byte array into a base64 String.
423     *
424     * @param data a byte array to encode.
425     * @return a base64 encode String.
426     */
427    public static String encodeBase64(byte[] data) {
428        return encodeBase64(data, false);
429    }
430
431    /**
432     * Encodes a byte array into a bse64 String.
433     *
434     * @param data The byte arry to encode.
435     * @param lineBreaks True if the encoding should contain line breaks and false if it should not.
436     * @return A base64 encoded String.
437     */
438    public static String encodeBase64(byte[] data, boolean lineBreaks) {
439        return encodeBase64(data, 0, data.length, lineBreaks);
440    }
441
442    /**
443     * Encodes a byte array into a bse64 String.
444     *
445     * @param data The byte arry to encode.
446     * @param offset the offset of the bytearray to begin encoding at.
447     * @param len the length of bytes to encode.
448     * @param lineBreaks True if the encoding should contain line breaks and false if it should not.
449     * @return A base64 encoded String.
450     */
451    public static String encodeBase64(byte[] data, int offset, int len, boolean lineBreaks) {
452        return Base64.encodeBytes(data, offset, len, (lineBreaks ?  Base64.NO_OPTIONS : Base64.DONT_BREAK_LINES));
453    }
454
455    /**
456     * Decodes a base64 String.
457     * Unlike Base64.decode() this method does not try to detect and decompress a gzip-compressed input.
458     *
459     * @param data a base64 encoded String to decode.
460     * @return the decoded String.
461     */
462    public static byte[] decodeBase64(String data) {
463        byte[] bytes;
464        try {
465            bytes = data.getBytes("UTF-8");
466        } catch (java.io.UnsupportedEncodingException uee) {
467            bytes = data.getBytes();
468        }
469
470        bytes = Base64.decode(bytes, 0, bytes.length, Base64.NO_OPTIONS);
471        return bytes;
472    }
473
474    /**
475     * Pseudo-random number generator object for use with randomString().
476     * The Random class is not considered to be cryptographically secure, so
477     * only use these random Strings for low to medium security applications.
478     */
479    private static Random randGen = new Random();
480
481    /**
482     * Array of numbers and letters of mixed case. Numbers appear in the list
483     * twice so that there is a more equal chance that a number will be picked.
484     * We can use the array to get a random number or letter by picking a random
485     * array index.
486     */
487    private static char[] numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +
488                    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
489
490    /**
491     * Returns a random String of numbers and letters (lower and upper case)
492     * of the specified length. The method uses the Random class that is
493     * built-in to Java which is suitable for low to medium grade security uses.
494     * This means that the output is only pseudo random, i.e., each number is
495     * mathematically generated so is not truly random.<p>
496     *
497     * The specified length must be at least one. If not, the method will return
498     * null.
499     *
500     * @param length the desired length of the random String to return.
501     * @return a random String of numbers and letters of the specified length.
502     */
503    public static String randomString(int length) {
504        if (length < 1) {
505            return null;
506        }
507        // Create a char buffer to put random letters and numbers in.
508        char [] randBuffer = new char[length];
509        for (int i=0; i<randBuffer.length; i++) {
510            randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
511        }
512        return new String(randBuffer);
513    }
514
515    /**
516     * Returns true if CharSequence is not null and is not empty, false otherwise
517     * Examples:
518     *    isNotEmpty(null) - false
519     *    isNotEmpty("") - false
520     *    isNotEmpty(" ") - true
521     *    isNotEmpty("empty") - true
522     *
523     * @param cs checked CharSequence
524     * @return true if string is not null and is not empty, false otherwise
525     */
526    public static boolean isNotEmpty(CharSequence cs) {
527        return !isNullOrEmpty(cs);
528    }
529
530    /**
531     * Returns true if the given CharSequence is null or empty.
532     *
533     * @param cs
534     * @return true if the given CharSequence is null or empty
535     */
536    public static boolean isNullOrEmpty(CharSequence cs) {
537        return cs == null || isEmpty(cs);
538    }
539
540    /**
541     * Returns true if the given CharSequence is empty
542     * 
543     * @param cs
544     * @return true if the given CharSequence is empty
545     */
546    public static boolean isEmpty(CharSequence cs) {
547        return cs.length() == 0;
548    }
549
550    public static boolean nullSafeCharSequenceEquals(CharSequence csOne, CharSequence csTwo) {
551        return nullSafeCharSequenceComperator(csOne, csTwo) == 0;
552    }
553
554    public static int nullSafeCharSequenceComperator(CharSequence csOne, CharSequence csTwo) {
555        if (csOne == null ^ csTwo == null) {
556            return (csOne == null) ? -1 : 1;
557        }
558        if (csOne == null && csTwo == null) {
559            return 0;
560        }
561        return csOne.toString().compareTo(csTwo.toString());
562    }
563}