001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.vfs2.filter;
018
019import java.io.Serializable;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.List;
023import java.util.Stack;
024
025import org.apache.commons.lang3.ArrayUtils;
026import org.apache.commons.lang3.StringUtils;
027import org.apache.commons.vfs2.FileFilter;
028import org.apache.commons.vfs2.FileSelectInfo;
029
030/**
031 * Filters files using the supplied wildcards.
032 * <p>
033 * This filter selects files and directories based on one or more wildcards.
034 * Testing is case-sensitive by default, but this can be configured.
035 * </p>
036 * <p>
037 * The wildcard matcher uses the characters '?' and '*' to represent a single or
038 * multiple wildcard characters. This is the same as often found on Dos/Unix
039 * command lines.
040 * </p>
041 * <p>
042 * For example, to retrieve and print all Java files that have the expression
043 * test in the name in the current directory:
044 * </p>
045 *
046 * <pre>
047 * FileSystemManager fsManager = VFS.getManager();
048 * FileObject dir = fsManager.toFileObject(new File(&quot;.&quot;));
049 * FileObject[] files;
050 * files = dir.findFiles(new FileFilterSelector(new WildcardFileFilter(&quot;*test*.java&quot;)));
051 * for (int i = 0; i &lt; files.length; i++) {
052 *     System.out.println(files[i]);
053 * }
054 * </pre>
055 *
056 * @author This code was originally ported from Apache Commons IO File Filter
057 * @see "https://commons.apache.org/proper/commons-io/"
058 * @since 2.4
059 */
060public class WildcardFileFilter implements FileFilter, Serializable {
061
062    private static final long serialVersionUID = 1L;
063
064    /**
065     * Splits a string into a number of tokens. The text is split by '?' and '*'.
066     * Where multiple '*' occur consecutively they are collapsed into a single '*'.
067     *
068     * @param text the text to split
069     * @return the array of tokens, never null
070     */
071    // CHECKSTYLE:OFF Cyclomatic complexity of 12 is OK here
072    static String[] splitOnTokens(final String text) {
073        // used by wildcardMatch
074        // package level so a unit test may run on this
075
076        if (text.indexOf('?') == -1 && text.indexOf('*') == -1) {
077            return new String[] {text};
078        }
079
080        final char[] array = text.toCharArray();
081        final ArrayList<String> list = new ArrayList<>();
082        final StringBuilder builder = new StringBuilder();
083        for (int i = 0; i < array.length; i++) {
084            if (array[i] == '?' || array[i] == '*') {
085                if (StringUtils.isNotEmpty(builder)) {
086                    list.add(builder.toString());
087                    builder.setLength(0);
088                }
089                if (array[i] == '?') {
090                    list.add("?");
091                } else if (list.isEmpty() || i > 0 && !list.get(list.size() - 1).equals("*")) {
092                    list.add("*");
093                }
094            } else {
095                builder.append(array[i]);
096            }
097        }
098        if (StringUtils.isNotEmpty(builder)) {
099            list.add(builder.toString());
100        }
101
102        return list.toArray(ArrayUtils.EMPTY_STRING_ARRAY);
103    }
104
105    /**
106     * Checks a file name to see if it matches the specified wildcard matcher
107     * allowing control over case-sensitivity.
108     * <p>
109     * The wildcard matcher uses the characters '?' and '*' to represent a single or
110     * multiple (zero or more) wildcard characters. N.B. the sequence "*?" does not
111     * work properly at present in match strings.
112     * </p>
113     *
114     * @param fileName        the file name to match on
115     * @param wildcardMatcher the wildcard string to match against
116     * @param caseSensitivity what case sensitivity rule to use, null means
117     *                        case-sensitive
118     *
119     * @return true if the file name matches the wildcard string
120     */
121    // CHECKSTYLE:OFF TODO xxx Cyclomatic complexity of 19 should be refactored
122    static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase caseSensitivity) {
123        if (fileName == null && wildcardMatcher == null) {
124            return true;
125        }
126        if (fileName == null || wildcardMatcher == null) {
127            return false;
128        }
129        if (caseSensitivity == null) {
130            caseSensitivity = IOCase.SENSITIVE;
131        }
132        final String[] wcs = splitOnTokens(wildcardMatcher);
133        boolean anyChars = false;
134        int textIdx = 0;
135        int wcsIdx = 0;
136        final Stack<int[]> backtrack = new Stack<>();
137
138        // loop around a backtrack stack, to handle complex * matching
139        do {
140            if (!backtrack.isEmpty()) {
141                final int[] array = backtrack.pop();
142                wcsIdx = array[0];
143                textIdx = array[1];
144                anyChars = true;
145            }
146
147            // loop whilst tokens and text left to process
148            while (wcsIdx < wcs.length) {
149
150                if (wcs[wcsIdx].equals("?")) {
151                    // ? so move to next text char
152                    textIdx++;
153                    if (textIdx > fileName.length()) {
154                        break;
155                    }
156                    anyChars = false;
157
158                } else if (wcs[wcsIdx].equals("*")) {
159                    // set any chars status
160                    anyChars = true;
161                    if (wcsIdx == wcs.length - 1) {
162                        textIdx = fileName.length();
163                    }
164
165                } else {
166                    // matching text token
167                    if (anyChars) {
168                        // any chars then try to locate text token
169                        textIdx = caseSensitivity.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
170                        if (textIdx == -1) {
171                            // token not found
172                            break;
173                        }
174                        final int repeat = caseSensitivity.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
175                        if (repeat >= 0) {
176                            backtrack.push(new int[] {wcsIdx, repeat});
177                        }
178                    } else if (!caseSensitivity.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
179                        // matching from current position
180                        // couldn't match token
181                        break;
182                    }
183
184                    // matched text token, move text index to end of matched
185                    // token
186                    textIdx += wcs[wcsIdx].length();
187                    anyChars = false;
188                }
189
190                wcsIdx++;
191            }
192
193            // full match
194            if (wcsIdx == wcs.length && textIdx == fileName.length()) {
195                return true;
196            }
197
198        } while (!backtrack.isEmpty());
199
200        return false;
201    }
202    // CHECKSTYLE:ON
203
204    /** Whether the comparison is case-sensitive. */
205    private final IOCase caseSensitivity;
206
207    /** The wildcards that will be used to match file names. */
208    private final List<String> wildcards;
209
210    /**
211     * Constructs a new wildcard filter for a list of wildcards specifying
212     * case-sensitivity.
213     *
214     * @param caseSensitivity how to handle case sensitivity, null means
215     *                        case-sensitive
216     * @param wildcards       the list of wildcards to match, not null
217     */
218    public WildcardFileFilter(final IOCase caseSensitivity, final List<String> wildcards) {
219        if (wildcards == null) {
220            throw new IllegalArgumentException("The wildcard list must not be null");
221        }
222        this.wildcards = new ArrayList<>(wildcards);
223        this.caseSensitivity = caseSensitivity == null ? IOCase.SENSITIVE : caseSensitivity;
224    }
225
226    /**
227     * Constructs a new wildcard filter for an array of wildcards specifying
228     * case-sensitivity.
229     *
230     * @param caseSensitivity how to handle case sensitivity, null means
231     *                        case-sensitive
232     * @param wildcards       the array of wildcards to match, not null
233     */
234    public WildcardFileFilter(final IOCase caseSensitivity, final String... wildcards) {
235        if (wildcards == null) {
236            throw new IllegalArgumentException("The wildcard array must not be null");
237        }
238        this.wildcards = new ArrayList<>(Arrays.asList(wildcards));
239        this.caseSensitivity = caseSensitivity == null ? IOCase.SENSITIVE : caseSensitivity;
240    }
241
242    /**
243     * Constructs a new case-sensitive wildcard filter for a list of wildcards.
244     *
245     * @param wildcards the list of wildcards to match, not null
246     */
247    public WildcardFileFilter(final List<String> wildcards) {
248        this(null, wildcards);
249    }
250
251    /**
252     * Constructs a new case-sensitive wildcard filter for an array of wildcards.
253     * <p>
254     * The array is not cloned, so could be changed after constructing the instance.
255     * This would be inadvisable however.
256     *
257     * @param wildcards the array of wildcards to match
258     */
259    public WildcardFileFilter(final String... wildcards) {
260        this(null, wildcards);
261    }
262
263    /**
264     * Checks to see if the file name matches one of the wildcards.
265     *
266     * @param fileSelectInfo the file to check
267     * @return true if the file name matches one of the wildcards
268     */
269    @Override
270    public boolean accept(final FileSelectInfo fileSelectInfo) {
271        final String name = fileSelectInfo.getFile().getName().getBaseName();
272        return wildcards.stream().anyMatch(wildcard -> wildcardMatch(name, wildcard, caseSensitivity));
273    }
274
275    // CHECKSTYLE:ON
276
277    /**
278     * Provide a String representation of this file filter.
279     *
280     * @return a String representation
281     */
282    @Override
283    public String toString() {
284        final StringBuilder buffer = new StringBuilder();
285        buffer.append(super.toString());
286        buffer.append("(");
287        if (wildcards != null) {
288            buffer.append(String.join(",", wildcards));
289        }
290        buffer.append(")");
291        return buffer.toString();
292    }
293
294}