StringUtils.java

  1. /**
  2.  *
  3.  * Copyright 2003-2007 Jive Software, 2016-2024 Florian Schmaus.
  4.  *
  5.  * Licensed under the Apache License, Version 2.0 (the "License");
  6.  * you may not use this file except in compliance with the License.
  7.  * You may obtain a copy of the License at
  8.  *
  9.  *     http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.jivesoftware.smack.util;

  18. import java.io.IOException;
  19. import java.nio.CharBuffer;
  20. import java.nio.charset.StandardCharsets;
  21. import java.util.ArrayList;
  22. import java.util.Arrays;
  23. import java.util.Collection;
  24. import java.util.Iterator;
  25. import java.util.List;
  26. import java.util.Random;
  27. import java.util.regex.Pattern;

  28. /**
  29.  * A collection of utility methods for String objects.
  30.  */
  31. public class StringUtils {

  32.     public static final String MD5 = "MD5";
  33.     public static final String SHA1 = "SHA-1";

  34.     public static final String QUOTE_ENCODE = """;
  35.     public static final String APOS_ENCODE = "'";
  36.     public static final String AMP_ENCODE = "&";
  37.     public static final String LT_ENCODE = "<";
  38.     public static final String GT_ENCODE = ">";

  39.     public static final char[] HEX_CHARS = "0123456789abcdef".toCharArray();

  40.     /**
  41.      * Escape <code>input</code> for XML.
  42.      *
  43.      * @param input the input to escape.
  44.      * @return the XML escaped variant of <code>input</code>.
  45.      */
  46.     public static CharSequence escapeForXml(CharSequence input) {
  47.         return escapeForXml(input, XmlEscapeMode.safe);
  48.     }

  49.     /**
  50.      * Escape <code>input</code> for XML.
  51.      *
  52.      * @param input the input to escape.
  53.      * @return the XML escaped variant of <code>input</code>.
  54.      * @since 4.2
  55.      */
  56.     public static CharSequence escapeForXmlAttribute(CharSequence input) {
  57.         return escapeForXml(input, XmlEscapeMode.forAttribute);
  58.     }

  59.     /**
  60.      * Escape <code>input</code> for XML.
  61.      * <p>
  62.      * This is an optimized variant of {@link #escapeForXmlAttribute(CharSequence)} for XML where the
  63.      * XML attribute is quoted using ''' (Apos).
  64.      * </p>
  65.      *
  66.      * @param input the input to escape.
  67.      * @return the XML escaped variant of <code>input</code>.
  68.      * @since 4.2
  69.      */
  70.     public static CharSequence escapeForXmlAttributeApos(CharSequence input) {
  71.         return escapeForXml(input, XmlEscapeMode.forAttributeApos);
  72.     }

  73.     /**
  74.      * Escape <code>input</code> for XML.
  75.      *
  76.      * @param input the input to escape.
  77.      * @return the XML escaped variant of <code>input</code>.
  78.      * @since 4.2
  79.      */
  80.     public static CharSequence escapeForXmlText(CharSequence input) {
  81.         return escapeForXml(input, XmlEscapeMode.forText);
  82.     }

  83.     private enum XmlEscapeMode {
  84.         safe,
  85.         forAttribute,
  86.         forAttributeApos,
  87.         forText,
  88.     }

  89.     /**
  90.      * Escapes all necessary characters in the CharSequence so that it can be used
  91.      * in an XML doc.
  92.      *
  93.      * @param input the CharSequence to escape.
  94.      * @return the string with appropriate characters escaped.
  95.      */
  96.     private static CharSequence escapeForXml(final CharSequence input, final XmlEscapeMode xmlEscapeMode) {
  97.         if (input == null) {
  98.             return null;
  99.         }
  100.         final int len = input.length();
  101.         final StringBuilder out = new StringBuilder((int) (len * 1.3));
  102.         CharSequence toAppend;
  103.         char ch;
  104.         int last = 0;
  105.         int i = 0;
  106.         while (i < len) {
  107.             toAppend = null;
  108.             ch = input.charAt(i);
  109.             switch (xmlEscapeMode) {
  110.             case safe:
  111.                 switch (ch) {
  112.                 case '<':
  113.                     toAppend = LT_ENCODE;
  114.                     break;
  115.                 case '>':
  116.                     toAppend = GT_ENCODE;
  117.                     break;
  118.                 case '&':
  119.                     toAppend = AMP_ENCODE;
  120.                     break;
  121.                 case '"':
  122.                     toAppend = QUOTE_ENCODE;
  123.                     break;
  124.                 case '\'':
  125.                     toAppend = APOS_ENCODE;
  126.                     break;
  127.                 default:
  128.                     break;
  129.                 }
  130.                 break;
  131.             case forAttribute:
  132.                 // No need to escape '>' for attributes.
  133.                 switch (ch) {
  134.                 case '<':
  135.                     toAppend = LT_ENCODE;
  136.                     break;
  137.                 case '&':
  138.                     toAppend = AMP_ENCODE;
  139.                     break;
  140.                 case '"':
  141.                     toAppend = QUOTE_ENCODE;
  142.                     break;
  143.                 case '\'':
  144.                     toAppend = APOS_ENCODE;
  145.                     break;
  146.                 default:
  147.                     break;
  148.                 }
  149.                 break;
  150.             case forAttributeApos:
  151.                 // No need to escape '>' and '"' for attributes using '\'' as quote.
  152.                 switch (ch) {
  153.                 case '<':
  154.                     toAppend = LT_ENCODE;
  155.                     break;
  156.                 case '&':
  157.                     toAppend = AMP_ENCODE;
  158.                     break;
  159.                 case '\'':
  160.                     toAppend = APOS_ENCODE;
  161.                     break;
  162.                 default:
  163.                     break;
  164.                 }
  165.                 break;
  166.             case forText:
  167.                 // No need to escape '"', '\'', and '>' for text.
  168.                 switch (ch) {
  169.                 case '<':
  170.                     toAppend = LT_ENCODE;
  171.                     break;
  172.                 case '&':
  173.                     toAppend = AMP_ENCODE;
  174.                     break;
  175.                 default:
  176.                     break;
  177.                 }
  178.                 break;
  179.             }
  180.             if (toAppend != null) {
  181.                 if (i > last) {
  182.                     out.append(input, last, i);
  183.                 }
  184.                 out.append(toAppend);
  185.                 last = ++i;
  186.             } else {
  187.                 i++;
  188.             }
  189.         }
  190.         if (last == 0) {
  191.             return input;
  192.         }
  193.         if (i > last) {
  194.             out.append(input, last, i);
  195.         }
  196.         return out;
  197.     }

  198.     /**
  199.      * Hashes a String using the SHA-1 algorithm and returns the result as a
  200.      * String of hexadecimal numbers. This method is synchronized to avoid
  201.      * excessive MessageDigest object creation. If calling this method becomes
  202.      * a bottleneck in your code, you may wish to maintain a pool of
  203.      * MessageDigest objects instead of using this method.
  204.      * <p>
  205.      * A hash is a one-way function -- that is, given an
  206.      * input, an output is easily computed. However, given the output, the
  207.      * input is almost impossible to compute. This is useful for passwords
  208.      * since we can store the hash and a hacker will then have a very hard time
  209.      * determining the original password.
  210.      *
  211.      * @param data the String to compute the hash of.
  212.      * @return a hashed version of the passed-in String
  213.      * @deprecated use {@link org.jivesoftware.smack.util.SHA1#hex(String)} instead.
  214.      */
  215.     @Deprecated
  216.     public static synchronized String hash(String data) {
  217.         return org.jivesoftware.smack.util.SHA1.hex(data);
  218.     }

  219.     /**
  220.      * Encodes an array of bytes as String representation of hexadecimal.
  221.      *
  222.      * @param bytes an array of bytes to convert to a hex string.
  223.      * @return generated hex string.
  224.      */
  225.     public static String encodeHex(byte[] bytes) {
  226.         char[] hexChars = new char[bytes.length * 2];
  227.         for (int j = 0; j < bytes.length; j++) {
  228.             int v = bytes[j] & 0xFF;
  229.             hexChars[j * 2] = HEX_CHARS[v >>> 4];
  230.             hexChars[j * 2 + 1] = HEX_CHARS[v & 0x0F];
  231.         }
  232.         return new String(hexChars);
  233.     }

  234.     public static byte[] toUtf8Bytes(String string) {
  235.         return string.getBytes(StandardCharsets.UTF_8);
  236.     }

  237.     /**
  238.      * 24 upper case characters from the latin alphabet and numbers without '0' and 'O'.
  239.      */
  240.     public static final String UNAMBIGUOUS_NUMBERS_AND_LETTERS_STRING = "123456789ABCDEFGHIJKLMNPQRSTUVWXYZ";

  241.     /**
  242.      * 24 upper case characters from the latin alphabet and numbers without '0' and 'O'.
  243.      */
  244.     private static final char[] UNAMBIGUOUS_NUMBERS_AND_LETTERS = UNAMBIGUOUS_NUMBERS_AND_LETTERS_STRING.toCharArray();

  245.     /**
  246.      * Returns a random String of numbers and letters (lower and upper case)
  247.      * of the specified length. The method uses the Random class that is
  248.      * built-in to Java which is suitable for low to medium grade security uses.
  249.      * This means that the output is only pseudo random, i.e., each number is
  250.      * mathematically generated so is not truly random.<p>
  251.      *
  252.      * The specified length must be at least one. If not, the method will return
  253.      * null.
  254.      *
  255.      * @param length the desired length of the random String to return.
  256.      * @return a random String of numbers and letters of the specified length.
  257.      */
  258.     public static String insecureRandomString(int length) {
  259.         return randomString(length, RandomUtil.RANDOM.get());
  260.     }

  261.     public static String secureOnlineAttackSafeRandomString() {
  262.         // 34^10 = 2.06e15 possible combinations. Which is enough to protect against online brute force attacks.
  263.         // See also https://www.grc.com/haystack.htm
  264.         final int REQUIRED_LENGTH = 10;

  265.         return randomString(RandomUtil.SECURE_RANDOM.get(), UNAMBIGUOUS_NUMBERS_AND_LETTERS, REQUIRED_LENGTH);
  266.     }

  267.     public static String secureUniqueRandomString() {
  268.         // 34^13 = 8.11e19 possible combinations, which is > 2^64.
  269.         final int REQUIRED_LENGTH = 13;

  270.         return randomString(RandomUtil.SECURE_RANDOM.get(), UNAMBIGUOUS_NUMBERS_AND_LETTERS, REQUIRED_LENGTH);
  271.     }

  272.     /**
  273.      * Generate a secure random string with is human readable. The resulting string consists of 24 upper case characters
  274.      * from the Latin alphabet and numbers without '0' and 'O', grouped into 4-characters chunks, e.g.
  275.      * "TWNK-KD5Y-MT3T-E1GS-DRDB-KVTW". The characters are randomly selected by a cryptographically secure pseudorandom
  276.      * number generator (CSPRNG).
  277.      * <p>
  278.      * The string can be used a backup "code" for secrets, and is in fact the same as the one backup code specified in
  279.      * XEP-0373 and the one used by the <a href="https://github.com/open-keychain/open-keychain/wiki/Backups">Backup
  280.      * Format v2 of OpenKeychain</a>.
  281.      * </p>
  282.      *
  283.      * @see <a href="https://xmpp.org/extensions/xep-0373.html#backup-encryption"> XEP-0373 §5.4 Encrypting the Secret
  284.      *      Key Backup</a>
  285.      * @return a human readable secure random string.
  286.      */
  287.     public static String secureOfflineAttackSafeRandomString() {
  288.         // 34^24 = 2^122.10 possible combinations. Which is enough to protect against offline brute force attacks.
  289.         // See also https://www.grc.com/haystack.htm
  290.         final int REQUIRED_LENGTH = 24;

  291.         return randomString(RandomUtil.SECURE_RANDOM.get(), UNAMBIGUOUS_NUMBERS_AND_LETTERS, REQUIRED_LENGTH);
  292.     }

  293.     private static final int RANDOM_STRING_CHUNK_SIZE = 4;

  294.     private static String randomString(Random random, char[] alphabet, int numRandomChars) {
  295.         // The buffer most hold the size of the requested number of random chars and the chunk separators ('-').
  296.         int bufferSize = numRandomChars + ((numRandomChars - 1) / RANDOM_STRING_CHUNK_SIZE);
  297.         CharBuffer charBuffer = CharBuffer.allocate(bufferSize);

  298.         try {
  299.             randomString(charBuffer, random, alphabet, numRandomChars);
  300.         } catch (IOException e) {
  301.             // This should never happen if we calculate the buffer size correctly.
  302.             throw new AssertionError(e);
  303.         }

  304.         return charBuffer.flip().toString();
  305.     }

  306.     private static void randomString(Appendable appendable, Random random, char[] alphabet, int numRandomChars)
  307.                     throws IOException {
  308.         for (int randomCharNum = 1; randomCharNum <= numRandomChars; randomCharNum++) {
  309.             int randomIndex = random.nextInt(alphabet.length);
  310.             char randomChar = alphabet[randomIndex];
  311.             appendable.append(randomChar);

  312.             if (randomCharNum % RANDOM_STRING_CHUNK_SIZE == 0 && randomCharNum < numRandomChars) {
  313.                 appendable.append('-');
  314.             }
  315.         }
  316.     }

  317.     public static String randomString(final int length) {
  318.         return randomString(length, RandomUtil.SECURE_RANDOM.get());
  319.     }

  320.     public static String randomString(final int length, Random random) {
  321.         if (length == 0) {
  322.             return "";
  323.         }

  324.         char[] randomChars = new char[length];
  325.         for (int i = 0; i < length; i++) {
  326.             int index = random.nextInt(UNAMBIGUOUS_NUMBERS_AND_LETTERS.length);
  327.             randomChars[i] = UNAMBIGUOUS_NUMBERS_AND_LETTERS[index];
  328.         }
  329.         return new String(randomChars);
  330.     }

  331.     /**
  332.      * Returns true if CharSequence is not null and is not empty, false otherwise.
  333.      * Examples:
  334.      *    isNotEmpty(null) - false
  335.      *    isNotEmpty("") - false
  336.      *    isNotEmpty(" ") - true
  337.      *    isNotEmpty("empty") - true
  338.      *
  339.      * @param cs checked CharSequence
  340.      * @return true if string is not null and is not empty, false otherwise
  341.      */
  342.     public static boolean isNotEmpty(CharSequence cs) {
  343.         return !isNullOrEmpty(cs);
  344.     }

  345.     /**
  346.      * Returns true if the given CharSequence is null or empty.
  347.      *
  348.      * @param cs TODO javadoc me please
  349.      * @return true if the given CharSequence is null or empty
  350.      */
  351.     public static boolean isNullOrEmpty(CharSequence cs) {
  352.         return cs == null || isEmpty(cs);
  353.     }

  354.     /**
  355.      * Returns true if all given CharSequences are not empty.
  356.      *
  357.      * @param css the CharSequences to test.
  358.      * @return true if all given CharSequences are not empty.
  359.      */
  360.     public static boolean isNotEmpty(CharSequence... css) {
  361.         for (CharSequence cs : css) {
  362.             if (StringUtils.isNullOrEmpty(cs)) {
  363.                 return false;
  364.             }
  365.         }
  366.         return true;
  367.     }

  368.     /**
  369.      * Returns true if all given CharSequences are either null or empty.
  370.      *
  371.      * @param css the CharSequences to test.
  372.      * @return true if all given CharSequences are null or empty.
  373.      */
  374.     public static boolean isNullOrEmpty(CharSequence... css) {
  375.         for (CharSequence cs : css) {
  376.             if (StringUtils.isNotEmpty(cs)) {
  377.                 return false;
  378.             }
  379.         }
  380.         return true;
  381.     }

  382.     public static boolean isNullOrNotEmpty(CharSequence cs) {
  383.         if (cs == null) {
  384.             return true;
  385.         }
  386.         return !cs.toString().isEmpty();
  387.     }

  388.     /**
  389.      * Returns true if the given CharSequence is empty.
  390.      *
  391.      * @param cs TODO javadoc me please
  392.      * @return true if the given CharSequence is empty
  393.      */
  394.     public static boolean isEmpty(CharSequence cs) {
  395.         return cs.length() == 0;
  396.     }

  397.     /**
  398.      * Transform a collection of objects to a whitespace delimited String.
  399.      *
  400.      * @param collection the collection to transform.
  401.      * @return a String with all the elements of the collection.
  402.      */
  403.     public static String collectionToString(Collection<? extends Object> collection) {
  404.         return toStringBuilder(collection, " ").toString();
  405.     }

  406.     /**
  407.      * Transform a collection of objects to a delimited String.
  408.      *
  409.      * @param collection the collection to transform.
  410.      * @param delimiter the delimiter used to delimit the Strings.
  411.      * @return a StringBuilder with all the elements of the collection.
  412.      */
  413.     public static StringBuilder toStringBuilder(Collection<? extends Object> collection, String delimiter) {
  414.         StringBuilder sb = new StringBuilder(collection.size() * 20);
  415.         appendTo(collection, delimiter, sb);
  416.         return sb;
  417.     }

  418.     public static void appendTo(Collection<? extends Object> collection, StringBuilder sb) {
  419.         appendTo(collection, ", ", sb);
  420.     }

  421.     public static <O extends Object> void appendTo(Collection<O> collection, StringBuilder sb,
  422.                     Consumer<O> appendFunction) {
  423.         appendTo(collection, ", ", sb, appendFunction);
  424.     }

  425.     public static void appendTo(Collection<? extends Object> collection, String delimiter, StringBuilder sb) {
  426.         appendTo(collection, delimiter, sb, o -> sb.append(o));
  427.     }

  428.     public static <O extends Object> void appendTo(Collection<O> collection, String delimiter, StringBuilder sb,
  429.                     Consumer<O> appendFunction) {
  430.         for (Iterator<O> it = collection.iterator(); it.hasNext();) {
  431.             O cs = it.next();
  432.             appendFunction.accept(cs);
  433.             if (it.hasNext()) {
  434.                 sb.append(delimiter);
  435.             }
  436.         }
  437.     }

  438.     public static String returnIfNotEmptyTrimmed(String string) {
  439.         if (string == null)
  440.             return null;
  441.         String trimmedString = string.trim();
  442.         if (trimmedString.length() > 0) {
  443.             return trimmedString;
  444.         } else {
  445.             return null;
  446.         }
  447.     }

  448.     public static boolean nullSafeCharSequenceEquals(CharSequence csOne, CharSequence csTwo) {
  449.         return nullSafeCharSequenceComparator(csOne, csTwo) == 0;
  450.     }

  451.     public static int nullSafeCharSequenceComparator(CharSequence csOne, CharSequence csTwo) {
  452.         if (csOne == null ^ csTwo == null) {
  453.             return (csOne == null) ? -1 : 1;
  454.         }
  455.         if (csOne == null && csTwo == null) {
  456.             return 0;
  457.         }
  458.         return csOne.toString().compareTo(csTwo.toString());
  459.     }

  460.     /**
  461.      * Require a {@link CharSequence} to be neither null, nor empty.
  462.      *
  463.      * @deprecated use {@link #requireNotNullNorEmpty(CharSequence, String)} instead.
  464.      * @param cs CharSequence
  465.      * @param message error message
  466.      * @param <CS> CharSequence type
  467.      * @return cs TODO javadoc me please
  468.      */
  469.     @Deprecated
  470.     public static <CS extends CharSequence> CS requireNotNullOrEmpty(CS cs, String message) {
  471.         return requireNotNullNorEmpty(cs, message);
  472.     }

  473.     /**
  474.      * Require a {@link CharSequence} to be neither null, nor empty.
  475.      *
  476.      * @param cs CharSequence
  477.      * @param message error message
  478.      * @param <CS> CharSequence type
  479.      * @return cs TODO javadoc me please
  480.      */
  481.     public static <CS extends CharSequence> CS requireNotNullNorEmpty(CS cs, String message) {
  482.         if (isNullOrEmpty(cs)) {
  483.             throw new IllegalArgumentException(message);
  484.         }
  485.         return cs;
  486.     }

  487.     public static <CS extends CharSequence> CS requireNullOrNotEmpty(CS cs, String message) {
  488.         if (cs == null) {
  489.             return null;
  490.         }
  491.         if (isEmpty(cs)) {
  492.             throw new IllegalArgumentException(message);
  493.         }
  494.         return cs;
  495.     }

  496.     /**
  497.      * Return the String representation of the given char sequence if it is not null.
  498.      *
  499.      * @param cs the char sequence or null.
  500.      * @return the String representation of <code>cs</code> or null.
  501.      */
  502.     public static String maybeToString(CharSequence cs) {
  503.         if (cs == null) {
  504.             return null;
  505.         }
  506.         return cs.toString();
  507.     }

  508.     /**
  509.      * Defined by XML 1.0 § 2.3 as:
  510.      *  S      ::=      (#x20 | #x9 | #xD | #xA)+
  511.      *
  512.      * @see <a href="https://www.w3.org/TR/xml/#sec-white-space">XML 1.0 § 2.3</a>
  513.      */
  514.     private static final Pattern XML_WHITESPACE = Pattern.compile("[\t\n\r ]");

  515.     public static String deleteXmlWhitespace(String string) {
  516.         return XML_WHITESPACE.matcher(string).replaceAll("");
  517.     }

  518.     public static Appendable appendHeading(Appendable appendable, String heading) throws IOException {
  519.         return appendHeading(appendable, heading, '-');
  520.     }

  521.     public static Appendable appendHeading(Appendable appendable, String heading, char underlineChar) throws IOException {
  522.         appendable.append(heading).append('\n');
  523.         for (int i = 0; i < heading.length(); i++) {
  524.             appendable.append(underlineChar);
  525.         }
  526.         return appendable.append('\n');
  527.     }

  528.     public static final String PORTABLE_NEWLINE_REGEX = "\\r?\\n";

  529.     public static List<String> splitLinesPortable(String input) {
  530.         String[] lines = input.split(PORTABLE_NEWLINE_REGEX);
  531.         return Arrays.asList(lines);
  532.     }

  533.     public static List<String> toStrings(Collection<? extends CharSequence> charSequences) {
  534.         List<String> res = new ArrayList<>(charSequences.size());
  535.         for (CharSequence cs : charSequences) {
  536.             String string = cs.toString();
  537.             res.add(string);
  538.         }
  539.         return res;
  540.     }
  541. }