001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload;
018
019import java.io.UnsupportedEncodingException;
020import java.util.HashMap;
021import java.util.Locale;
022import java.util.Map;
023
024import org.apache.commons.fileupload.util.mime.MimeUtility;
025
026/**
027 * A simple parser intended to parse sequences of name/value pairs.
028 *
029 * Parameter values are expected to be enclosed in quotes if they
030 * contain unsafe characters, such as '=' characters or separators.
031 * Parameter values are optional and can be omitted.
032 *
033 * <p>
034 *  <code>param1 = value; param2 = "anything goes; really"; param3</code>
035 * </p>
036 */
037public class ParameterParser {
038
039    /**
040     * String to be parsed.
041     */
042    private char[] chars = null;
043
044    /**
045     * Current position in the string.
046     */
047    private int pos = 0;
048
049    /**
050     * Maximum position in the string.
051     */
052    private int len = 0;
053
054    /**
055     * Start of a token.
056     */
057    private int i1 = 0;
058
059    /**
060     * End of a token.
061     */
062    private int i2 = 0;
063
064    /**
065     * Whether names stored in the map should be converted to lower case.
066     */
067    private boolean lowerCaseNames = false;
068
069    /**
070     * Default ParameterParser constructor.
071     */
072    public ParameterParser() {
073        super();
074    }
075
076    /**
077     * Are there any characters left to parse?
078     *
079     * @return {@code true} if there are unparsed characters,
080     *         {@code false} otherwise.
081     */
082    private boolean hasChar() {
083        return this.pos < this.len;
084    }
085
086    /**
087     * A helper method to process the parsed token. This method removes
088     * leading and trailing blanks as well as enclosing quotation marks,
089     * when necessary.
090     *
091     * @param quoted {@code true} if quotation marks are expected,
092     *               {@code false} otherwise.
093     * @return the token
094     */
095    private String getToken(boolean quoted) {
096        // Trim leading white spaces
097        while ((i1 < i2) && (Character.isWhitespace(chars[i1]))) {
098            i1++;
099        }
100        // Trim trailing white spaces
101        while ((i2 > i1) && (Character.isWhitespace(chars[i2 - 1]))) {
102            i2--;
103        }
104        // Strip away quotation marks if necessary
105        if (quoted
106            && ((i2 - i1) >= 2)
107            && (chars[i1] == '"')
108            && (chars[i2 - 1] == '"')) {
109            i1++;
110            i2--;
111        }
112        String result = null;
113        if (i2 > i1) {
114            result = new String(chars, i1, i2 - i1);
115        }
116        return result;
117    }
118
119    /**
120     * Tests if the given character is present in the array of characters.
121     *
122     * @param ch the character to test for presense in the array of characters
123     * @param charray the array of characters to test against
124     *
125     * @return {@code true} if the character is present in the array of
126     *   characters, {@code false} otherwise.
127     */
128    private boolean isOneOf(char ch, final char[] charray) {
129        boolean result = false;
130        for (char element : charray) {
131            if (ch == element) {
132                result = true;
133                break;
134            }
135        }
136        return result;
137    }
138
139    /**
140     * Parses out a token until any of the given terminators
141     * is encountered.
142     *
143     * @param terminators the array of terminating characters. Any of these
144     * characters when encountered signify the end of the token
145     *
146     * @return the token
147     */
148    private String parseToken(final char[] terminators) {
149        char ch;
150        i1 = pos;
151        i2 = pos;
152        while (hasChar()) {
153            ch = chars[pos];
154            if (isOneOf(ch, terminators)) {
155                break;
156            }
157            i2++;
158            pos++;
159        }
160        return getToken(false);
161    }
162
163    /**
164     * Parses out a token until any of the given terminators
165     * is encountered outside the quotation marks.
166     *
167     * @param terminators the array of terminating characters. Any of these
168     * characters when encountered outside the quotation marks signify the end
169     * of the token
170     *
171     * @return the token
172     */
173    private String parseQuotedToken(final char[] terminators) {
174        char ch;
175        i1 = pos;
176        i2 = pos;
177        boolean quoted = false;
178        boolean charEscaped = false;
179        while (hasChar()) {
180            ch = chars[pos];
181            if (!quoted && isOneOf(ch, terminators)) {
182                break;
183            }
184            if (!charEscaped && ch == '"') {
185                quoted = !quoted;
186            }
187            charEscaped = (!charEscaped && ch == '\\');
188            i2++;
189            pos++;
190
191        }
192        return getToken(true);
193    }
194
195    /**
196     * Returns {@code true} if parameter names are to be converted to lower
197     * case when name/value pairs are parsed.
198     *
199     * @return {@code true} if parameter names are to be
200     * converted to lower case when name/value pairs are parsed.
201     * Otherwise returns {@code false}
202     */
203    public boolean isLowerCaseNames() {
204        return this.lowerCaseNames;
205    }
206
207    /**
208     * Sets the flag if parameter names are to be converted to lower case when
209     * name/value pairs are parsed.
210     *
211     * @param b {@code true} if parameter names are to be
212     * converted to lower case when name/value pairs are parsed.
213     * {@code false} otherwise.
214     */
215    public void setLowerCaseNames(boolean b) {
216        this.lowerCaseNames = b;
217    }
218
219    /**
220     * Extracts a map of name/value pairs from the given string. Names are
221     * expected to be unique. Multiple separators may be specified and
222     * the earliest found in the input string is used.
223     *
224     * @param str the string that contains a sequence of name/value pairs
225     * @param separators the name/value pairs separators
226     *
227     * @return a map of name/value pairs
228     */
229    public Map<String, String> parse(final String str, char[] separators) {
230        if (separators == null || separators.length == 0) {
231            return new HashMap<String, String>();
232        }
233        char separator = separators[0];
234        if (str != null) {
235            int idx = str.length();
236            for (char separator2 : separators) {
237                int tmp = str.indexOf(separator2);
238                if (tmp != -1 && tmp < idx) {
239                    idx = tmp;
240                    separator = separator2;
241                }
242            }
243        }
244        return parse(str, separator);
245    }
246
247    /**
248     * Extracts a map of name/value pairs from the given string. Names are
249     * expected to be unique.
250     *
251     * @param str the string that contains a sequence of name/value pairs
252     * @param separator the name/value pairs separator
253     *
254     * @return a map of name/value pairs
255     */
256    public Map<String, String> parse(final String str, char separator) {
257        if (str == null) {
258            return new HashMap<String, String>();
259        }
260        return parse(str.toCharArray(), separator);
261    }
262
263    /**
264     * Extracts a map of name/value pairs from the given array of
265     * characters. Names are expected to be unique.
266     *
267     * @param charArray the array of characters that contains a sequence of
268     * name/value pairs
269     * @param separator the name/value pairs separator
270     *
271     * @return a map of name/value pairs
272     */
273    public Map<String, String> parse(final char[] charArray, char separator) {
274        if (charArray == null) {
275            return new HashMap<String, String>();
276        }
277        return parse(charArray, 0, charArray.length, separator);
278    }
279
280    /**
281     * Extracts a map of name/value pairs from the given array of
282     * characters. Names are expected to be unique.
283     *
284     * @param charArray the array of characters that contains a sequence of
285     * name/value pairs
286     * @param offset - the initial offset.
287     * @param length - the length.
288     * @param separator the name/value pairs separator
289     *
290     * @return a map of name/value pairs
291     */
292    public Map<String, String> parse(
293        final char[] charArray,
294        int offset,
295        int length,
296        char separator) {
297
298        if (charArray == null) {
299            return new HashMap<String, String>();
300        }
301        HashMap<String, String> params = new HashMap<String, String>();
302        this.chars = charArray;
303        this.pos = offset;
304        this.len = length;
305
306        String paramName = null;
307        String paramValue = null;
308        while (hasChar()) {
309            paramName = parseToken(new char[] {
310                    '=', separator });
311            paramValue = null;
312            if (hasChar() && (charArray[pos] == '=')) {
313                pos++; // skip '='
314                paramValue = parseQuotedToken(new char[] {
315                        separator });
316
317                if (paramValue != null) {
318                    try {
319                        paramValue = MimeUtility.decodeText(paramValue);
320                    } catch (UnsupportedEncodingException e) {
321                        // let's keep the original value in this case
322                    }
323                }
324            }
325            if (hasChar() && (charArray[pos] == separator)) {
326                pos++; // skip separator
327            }
328            if ((paramName != null) && (paramName.length() > 0)) {
329                if (this.lowerCaseNames) {
330                    paramName = paramName.toLowerCase(Locale.ENGLISH);
331                }
332
333                params.put(paramName, paramValue);
334            }
335        }
336        return params;
337    }
338
339}