001/*
002 *  Copyright 2012 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.core.util;
017
018import java.io.UnsupportedEncodingException;
019import java.security.MessageDigest;
020import java.security.NoSuchAlgorithmException;
021import java.text.Normalizer;
022import java.util.ArrayList;
023import java.util.Collection;
024import java.util.Comparator;
025import java.util.Iterator;
026import java.util.List;
027import java.util.StringTokenizer;
028
029import org.apache.commons.codec.binary.Base64;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033import org.ametys.runtime.i18n.I18nizableText;
034
035/**
036 * A collection of String management utility methods.
037 */
038public final class StringUtils
039{
040    private static final Logger __LOGGER = LoggerFactory.getLogger(StringUtils.class);
041
042    private static final long __DATA_SIZE_NEXT_LIMIT = 1024;
043    private static final List<String> __DATA_SIZE_KEYS = List.of(
044        "PLUGINS_CORE_UI_FORMAT_FILE_SIZE_NOT_ESCAPED_BYTES",
045        "PLUGINS_CORE_UI_FORMAT_FILE_SIZE_NOT_ESCAPED_KB",
046        "PLUGINS_CORE_UI_FORMAT_FILE_SIZE_NOT_ESCAPED_MB",
047        "PLUGINS_CORE_UI_FORMAT_FILE_SIZE_NOT_ESCAPED_GB",
048        "PLUGINS_CORE_UI_FORMAT_FILE_SIZE_NOT_ESCAPED_TB"
049    );
050    
051    private static final String[] __CSV_BEGIN_CHARS = {"=", "@", "+", "-", "\r", "\t"};
052    private static final char __CSV_QUOTE = '"';
053    private static final String __CSV_QUOTE_STR = String.valueOf(__CSV_QUOTE);
054    
055    private StringUtils()
056    {
057        // empty private constructor
058    }
059    
060    /**
061     * Extract String values from a comma seprated list.
062     * @param values the comma separated list
063     * @return a collection of String or an empty collection if string is null or empty.
064     */
065    public static Collection<String> stringToCollection(String values)
066    {
067        Collection<String> result = new ArrayList<>();
068        if (values != null && values.length() > 0)
069        {
070            // Explore the string list with a stringtokenizer with ','.
071            StringTokenizer stk = new StringTokenizer(values, ",");
072
073            while (stk.hasMoreTokens())
074            {
075                // Don't forget to trim
076                result.add(stk.nextToken().trim());
077            }
078        }
079
080        return result;
081    }
082
083    /**
084     * Extract String values from a comma seprated list.
085     * @param values the comma separated list
086     * @return an array of String
087     */
088    public static String[] stringToStringArray(String values)
089    {
090        Collection<String> coll = stringToCollection(values);
091        return coll.toArray(new String[coll.size()]);
092    }
093    
094    /**
095     * Generates a unique String key, based on System.currentTimeMillis()
096     * @return a unique String value
097     */
098    public static String generateKey()
099    {
100        long value;
101        
102        // Find a new value
103        synchronized (StringUtils.class)
104        {
105            value = System.currentTimeMillis();
106
107            try
108            {
109                Thread.sleep(15);
110            }
111            catch (InterruptedException e)
112            {
113                // does nothing, continue
114            }
115        }
116
117        // Convert it to a string using radix 36 (more compact)
118        String longString = Long.toString(value, Character.MAX_RADIX);
119    
120        return longString;
121    }
122    
123    /**
124     * Encrypt a password by using first MD5 Hash and base64 encoding.
125     * @param password The password to be encrypted.
126     * @return The password encrypted or null if the MD5 is not supported
127     */
128    public static String md5Base64(String password)
129    {
130        if (password == null)
131        {
132            return null;
133        }
134        
135        MessageDigest md5;
136        try
137        {
138            md5 = MessageDigest.getInstance("MD5");
139        }
140        catch (NoSuchAlgorithmException e)
141        {
142            // This error exception not be raised since MD5 is embedded in the JDK
143            __LOGGER.error("Cannot encode the password to md5Base64", e);
144            return null;
145        }
146        
147        // MD5-hash the password.
148        md5.reset();
149        try
150        {
151            md5.update(password.getBytes("UTF-8"));
152        }
153        catch (UnsupportedEncodingException e)
154        {
155            throw new IllegalStateException(e);
156        }
157        byte [] hash = md5.digest();
158        
159        // Base64-encode the result.
160        try
161        {
162            return new String(Base64.encodeBase64(hash), "UTF-8");
163        }
164        catch (UnsupportedEncodingException e)
165        {
166            throw new IllegalStateException(e);
167        }
168    }
169    
170    /**
171     * Normalize string. Pass to lower case and remove Unicode accents and diacritics
172     * @param value the value to normalize
173     * @return the normalized value
174     */
175    public static String normalizeStringValue(String value)
176    {
177        return Normalizer.normalize(value.toLowerCase(), Normalizer.Form.NFD).replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
178    }
179    
180    /**
181     * Transform a size to a readable size for data (bytes, KB, MB, etc.).
182     * @param size The size to transform
183     * @return An internationalized text with the size and the unit.
184     */
185    public static I18nizableText toReadableDataSize(Long size)
186    {
187        if (size == 1L)
188        {
189            return _createReadatableDataSize(size, "PLUGINS_CORE_UI_FORMAT_FILE_SIZE_NOT_ESCAPED_BYTE");
190        }
191        return _toReadableDataSize(size, __DATA_SIZE_KEYS.iterator());
192    }
193    
194    private static I18nizableText _toReadableDataSize(Long size, Iterator<String> keys)
195    {
196        String key = keys.next();
197        if (!keys.hasNext() || size < __DATA_SIZE_NEXT_LIMIT)
198        {
199            return _createReadatableDataSize(size, key);
200        }
201        return _toReadableDataSize(size / __DATA_SIZE_NEXT_LIMIT, keys);
202    }
203    
204    private static I18nizableText _createReadatableDataSize(Long size, String key)
205    {
206        return new I18nizableText("plugin.core-ui", key, List.of(size.toString()));
207    }
208    
209    /**
210     * Returns a escaped {@code String} value for a CSV cell enclosed in double quotes.
211     *
212     * <p>Any double quote characters in the value are escaped with another double quote.</p>
213     * <p>If cell value contains a formula (ex: =SOMME(A0:A10)) it could be evaluated by CVS editor.<br>
214     * Use {@link #sanitizeCsv(String)} to avoid formula evaluation</p>
215     * 
216     * <pre>
217     * null                          => ""
218     * =1+2                          => "=1+2"
219     * =1+2'" ;,=1+2                 => "=1+2'"" ;,=1+2"
220     * L'orem ipsut; sit amet, dolor => "L'orem ipsut; sit amet, dolor"
221     * =cmd|' /c Calc.exe'!'A1'      => "=cmd|' /c Calc.exe'!'A1'"
222     * </pre>
223     *
224     * @param value the String value for CSV column. Can be null.
225     * @return the escaped value
226     */
227    public static String escapeCsv(String value)
228    {
229        StringBuilder sb = new StringBuilder();
230        
231        sb.append(__CSV_QUOTE);
232        
233        if (org.apache.commons.lang3.StringUtils.isNotEmpty(value))
234        {
235            sb.append(org.apache.commons.lang3.StringUtils.replace(value, __CSV_QUOTE_STR, __CSV_QUOTE_STR + __CSV_QUOTE_STR));
236        }
237        
238        sb.append(__CSV_QUOTE);
239        
240        return sb.toString();
241    }
242    
243    /**
244     * Returns a sanitized {@code String} value for a CSV column enclosed in double quotes.
245     *
246     * <p>Any double quote characters in the value are escaped with another double quote.</p>
247     * 
248     * <p>If the value starts with '=', '+', '-', '@', newline or TAB, is prepend with a single quote.</p>
249     * 
250     * <pre>
251     * null                          => ""
252     * =1+2";=1+2                    => "'=1+2"";=1+2"
253     * =1+2'" ;,=1+2                 => "'=1+2'"" ;,=1+2"
254     * L'orem ipsut; sit amet, dolor => "L'orem ipsut; sit amet, dolor"
255     * =cmd|' /c Calc.exe'!'A1'      => "'=cmd|' /c Calc.exe'!'A1'"
256     * </pre>
257     *
258     * @param value the untrusted String value for CSV column. Can be null.
259     * @return the escaped and trusted value
260     */
261    public static String sanitizeCsv(String value)
262    {
263        StringBuilder sb = new StringBuilder();
264        
265        sb.append(__CSV_QUOTE);
266        
267        if (org.apache.commons.lang3.StringUtils.isNotEmpty(value))
268        {
269            if (org.apache.commons.lang3.StringUtils.startsWithAny(value, __CSV_BEGIN_CHARS))
270            {
271                sb.append("'");
272            }
273            sb.append(org.apache.commons.lang3.StringUtils.replace(value, __CSV_QUOTE_STR, __CSV_QUOTE_STR + __CSV_QUOTE_STR));
274        }
275        
276        sb.append(__CSV_QUOTE);
277        
278        return sb.toString();
279    }
280    
281    /**
282     * Returns a sanitized {@code String} value for a XLS-HTML column (no double quotes enclosing).
283     *
284     * <p>If the value starts with '=', '+', '-', '@', newline or TAB, is prepend with a single quote.</p>
285     * 
286     * <pre>
287     * null                          => StringUtils.EMPTY
288     * =1+2";=1+2                    => '=1+2";=1+2
289     * =1+2'" ;,=1+2                 => '=1+2'" ;,=1+2
290     * L'orem ipsut; sit amet, dolor => L'orem ipsut; sit amet, dolor
291     * =cmd|' /c Calc.exe'!'A1'      => '=cmd|' /c Calc.exe'!'A1
292     * </pre>
293     *
294     * @param value the untrusted String value for HTML-XLS column. Can be null.
295     * @return the trusted value
296     */
297    public static String sanitizeXlsHtml(String value)
298    {
299        if (org.apache.commons.lang3.StringUtils.isNotEmpty(value) && org.apache.commons.lang3.StringUtils.startsWithAny(value, __CSV_BEGIN_CHARS))
300        {
301            return "'" + value;
302        }
303        
304        return org.apache.commons.lang3.StringUtils.defaultString(value);
305    }
306    
307    /**
308     * Compares two strings ignoring case and accents and honoring natural numbers ordering.
309     */
310    public static class AlphanumComparator implements Comparator<String>
311    {
312        public int compare(String s1, String s2)
313        {
314            // Lowercase and replace accented characters with their non-accented equivalents
315            String normalizedS1 = Normalizer.normalize(s1.toLowerCase(), Normalizer.Form.NFD).replaceAll("[\\p{InCombiningDiacriticalMarks}]", "").trim();
316            String normalizedS2 = Normalizer.normalize(s2.toLowerCase(), Normalizer.Form.NFD).replaceAll("[\\p{InCombiningDiacriticalMarks}]", "").trim();
317            
318            int s1Index = 0;
319            int s2Index = 0;
320            
321            while (s1Index < normalizedS1.length() && s2Index < normalizedS2.length())
322            {
323                char s1Char = normalizedS1.charAt(s1Index);
324                char s2Char = normalizedS2.charAt(s2Index);
325
326                if (Character.isDigit(s1Char) && Character.isDigit(s2Char))
327                {
328                    int s1Start = s1Index;
329                    int s2Start = s2Index;
330
331                    while (s1Index < normalizedS1.length() && Character.isDigit(normalizedS1.charAt(s1Index)))
332                    {
333                        s1Index++;
334                    }
335                    while (s2Index < normalizedS2.length() && Character.isDigit(normalizedS2.charAt(s2Index)))
336                    {
337                        s2Index++;
338                    }
339
340                    int num1 = Integer.parseInt(normalizedS1.substring(s1Start, s1Index));
341                    int num2 = Integer.parseInt(normalizedS2.substring(s2Start, s2Index));
342                    if (num1 != num2)
343                    {
344                        return num1 - num2;
345                    }
346                }
347                else
348                {
349                    if (s1Char != s2Char)
350                    {
351                        return s1Char - s2Char;
352                    }
353                    s1Index++;
354                    s2Index++;
355                }
356            }
357            return normalizedS1.length() - normalizedS2.length();
358        }
359    }
360}