001/*
002 *  Copyright 2020 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.core.util;
017
018import java.io.ByteArrayOutputStream;
019import java.nio.charset.StandardCharsets;
020import java.util.Map;
021import java.util.function.Predicate;
022
023/**
024 * Utility class for encoding and decoding URL, following the RFC 3986 
025 * @see <a href="https://tools.ietf.org/html/rfc3986">https://tools.ietf.org/html/rfc3986</a>
026 */
027public final class URIUtils
028{
029    private static final String __NAME_VALUE_SEPARATOR = "=";
030    private static final String __PARAMETER_SEPARATOR = "&";
031    
032    private static Predicate<Byte> _isAlpha = c -> c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
033    private static Predicate<Byte> _isDigit = c -> c >= '0' && c <= '9';
034    private static Predicate<Byte> _isSubDelimiter = c -> '!' == c || '$' == c || '&' == c || '\'' == c || '(' == c || ')' == c 
035                                                       || '*' == c || '+' == c || ',' == c || ';' == c || '=' == c;
036    private static Predicate<Byte> _isUnreserved = _isAlpha.or(_isDigit).or(c -> '-' == c || '.' == c || '_' == c || '~' == c);
037    private static Predicate<Byte> _isPchar = _isUnreserved.or(_isSubDelimiter).or(c -> ':' == c || '@' == c);
038    
039    private URIUtils()
040    {
041        // empty
042    }
043    
044    /**
045     * Encode a parameter's value using UTF-8 encoding
046     * @param value the value or the name of the request's parameter to encode
047     * @return the encoded value.
048     */
049    public static String encodeParameter(String value)
050    {
051        return _encodeUriComponent(value, _isPchar.or(c -> '/' == c || '?' == c)
052                                                  .and(Predicate.not(c -> '=' == c || '+' == c || '&' == c)));
053    }
054    
055    /**
056     * Encode a fragment's value using UTF-8 encoding
057     * @param value the value or the name of the request's fragment to encode
058     * @return the encoded value.
059     */
060    public static String encodeFragment(String value)
061    {
062        return _encodeUriComponent(value, _isPchar.or(c -> '/' == c || '?' == c));
063    }
064    
065    /**
066     * Encode an URL path 
067     * @param path the path to encode (before question-mark character)
068     * @return the encoded path.
069     */
070    public static String encodePath(String path)
071    {
072        return _encodeUriComponent(path, _isPchar.or(c -> '/' == c)
073                                                 .and(Predicate.not(c -> ';' == c)));
074    }
075    
076    /**
077     * Encode an URL path segment
078     * @param pathSegment the path segment to encode
079     * @return the encoded path segment.
080     */
081    public static String encodePathSegment(String pathSegment)
082    {
083        return _encodeUriComponent(pathSegment, _isPchar.and(Predicate.not(c -> ';' == c)));
084    }
085    
086    /**
087     * Encode a request header value.
088     * @param header the value to encode
089     * @return the encoded value.
090     */
091    public static String encodeHeader(String header)
092    {
093        return _encodeUriComponent(header, _isUnreserved);
094    }
095
096    // Implementation taken from Spring's UriUtils#encode
097    @SuppressWarnings("all")
098    private static String _encodeUriComponent(String uriComponent, Predicate<Byte> charactersToKeep)
099    {
100        byte[] bytes = uriComponent.getBytes(StandardCharsets.UTF_8);
101        ByteArrayOutputStream bos = new ByteArrayOutputStream(bytes.length);
102        
103        for (byte b : bytes) 
104        {
105            if (b < 0) 
106            {
107                b += 256;
108            }
109            if (charactersToKeep.test(b))
110            {
111                bos.write(b);
112            }
113            else 
114            {
115                bos.write('%');
116                char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
117                char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
118                bos.write(hex1);
119                bos.write(hex2);
120            }
121        }
122
123        return new String(bos.toByteArray(), StandardCharsets.US_ASCII);
124    }
125    
126    /**
127     * Encode an URL path
128     * @param path the path to encode (before question-mark character)
129     * @param parameters the parameters. Can be null.
130     * @return the encoded path
131     */
132    public static String encodeURI(String path, Map<String, String> parameters)
133    {
134        return _buildURI(path, parameters, true);
135    }
136    
137    /**
138     * Build an URL.
139     * @param path the URL path.
140     * @param parameters the URL parameters. Can be null.
141     * @return the computed URL.
142     */
143    public static String buildURI(String path, Map<String, String> parameters)
144    {
145        return _buildURI(path, parameters, false);
146    }
147    
148    private static String _buildURI(String path, Map<String, String> parameters, boolean encode)
149    {
150        StringBuilder sb = new StringBuilder();
151        sb.append(encode ? encodePath(path) : path);
152        
153        if (parameters != null && !parameters.isEmpty())
154        {
155            StringBuilder query = new StringBuilder();
156            
157            for (String paramName : parameters.keySet())
158            {
159                String encodedName = encode ? encodeParameter(paramName) : paramName;
160                String encodedValue = encode ? encodeParameter(parameters.get(paramName)) : parameters.get(paramName);
161                
162                if (query.length() > 0) 
163                {
164                    query.append(__PARAMETER_SEPARATOR);
165                }
166                
167                query.append(encodedName)
168                    .append(__NAME_VALUE_SEPARATOR)
169                    .append(encodedValue);
170            }
171            
172            sb.append("?").append(query.toString());
173        }
174        
175        return sb.toString();
176    }
177    
178    /**
179     * Decodes an URI-encoded String.
180     * @param source the String to decode.
181     * @return the decoded String.
182     */
183    // Implementation taken from Spring's UriUtils#decode
184    @SuppressWarnings("all")
185    public static String decode(String source)
186    {
187        int length = source.length();
188        ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
189        boolean changed = false;
190        for (int i = 0; i < length; i++) 
191        {
192            char ch = source.charAt(i);
193            
194            if (ch == '%') 
195            {
196                if ((i + 2) < length) 
197                {
198                    char hex1 = source.charAt(i + 1);
199                    char hex2 = source.charAt(i + 2);
200                    int u = Character.digit(hex1, 16);
201                    int l = Character.digit(hex2, 16);
202                    
203                    if (u == -1 || l == -1) 
204                    {
205                        throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
206                    }
207                    
208                    bos.write((byte) ((u << 4) + l));
209                    
210                    i += 2;
211                    changed = true;
212                }
213                else 
214                {
215                    throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
216                }
217            }
218            else if (ch > 0 && ch < 128)
219            {
220                // to avoid the overhead of decoding/recoding an ASCII char
221                bos.write(ch);
222            }
223            else if (Character.isHighSurrogate(ch))
224            {
225                if ((i + 1) < length && Character.isLowSurrogate(source.charAt(i + 1))) 
226                {
227                    for (byte b : String.valueOf(new char[]{ch, source.charAt(i + 1)}).getBytes(StandardCharsets.UTF_8))
228                    {
229                        bos.write(b);
230                    }
231                    
232                    i += 1;
233                }
234                else 
235                {
236                    throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
237                }
238            }
239            else 
240            {
241                for (byte b : String.valueOf(ch).getBytes(StandardCharsets.UTF_8))
242                {
243                    bos.write(b);
244                }
245            }
246        }
247        
248        return changed ? new String(bos.toByteArray(), StandardCharsets.UTF_8) : source;
249    }
250}