001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.io.File; 020import java.util.ArrayDeque; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.Deque; 025import java.util.List; 026import java.util.Objects; 027import java.util.regex.Matcher; 028import java.util.regex.Pattern; 029 030/** 031 * General file name and file path manipulation utilities. 032 * <p> 033 * When dealing with file names you can hit problems when moving from a Windows 034 * based development machine to a Unix based production machine. 035 * This class aims to help avoid those problems. 036 * <p> 037 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 038 * using JDK {@link java.io.File File} objects and the two argument constructor 039 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. 040 * <p> 041 * Most methods on this class are designed to work the same on both Unix and Windows. 042 * Those that don't include 'System', 'Unix' or 'Windows' in their name. 043 * <p> 044 * Most methods recognize both separators (forward and back), and both 045 * sets of prefixes. See the Javadoc of each method for details. 046 * <p> 047 * This class defines six components within a file name 048 * (example C:\dev\project\file.txt): 049 * <ul> 050 * <li>the prefix - C:\</li> 051 * <li>the path - dev\project\</li> 052 * <li>the full path - C:\dev\project\</li> 053 * <li>the name - file.txt</li> 054 * <li>the base name - file</li> 055 * <li>the extension - txt</li> 056 * </ul> 057 * Note that this class works best if directory file names end with a separator. 058 * If you omit the last separator, it is impossible to determine if the file name 059 * corresponds to a file or a directory. As a result, we have chosen to say 060 * it corresponds to a file. 061 * <p> 062 * This class only supports Unix and Windows style names. 063 * Prefixes are matched as follows: 064 * <pre> 065 * Windows: 066 * a\b\c.txt --> "" --> relative 067 * \a\b\c.txt --> "\" --> current drive absolute 068 * C:a\b\c.txt --> "C:" --> drive relative 069 * C:\a\b\c.txt --> "C:\" --> absolute 070 * \\server\a\b\c.txt --> "\\server\" --> UNC 071 * 072 * Unix: 073 * a/b/c.txt --> "" --> relative 074 * /a/b/c.txt --> "/" --> absolute 075 * ~/a/b/c.txt --> "~/" --> current user 076 * ~ --> "~/" --> current user (slash added) 077 * ~user/a/b/c.txt --> "~user/" --> named user 078 * ~user --> "~user/" --> named user (slash added) 079 * </pre> 080 * Both prefix styles are matched always, irrespective of the machine that you are 081 * currently running on. 082 * <p> 083 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. 084 * 085 * @since 1.1 086 */ 087public class FilenameUtils { 088 089 private static final String[] EMPTY_STRING_ARRAY = {}; 090 091 private static final String EMPTY_STRING = ""; 092 093 private static final int NOT_FOUND = -1; 094 095 /** 096 * The extension separator character. 097 * @since 1.4 098 */ 099 public static final char EXTENSION_SEPARATOR = '.'; 100 101 /** 102 * The extension separator String. 103 * @since 1.4 104 */ 105 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 106 107 /** 108 * The Unix separator character. 109 */ 110 private static final char UNIX_SEPARATOR = '/'; 111 112 /** 113 * The Windows separator character. 114 */ 115 private static final char WINDOWS_SEPARATOR = '\\'; 116 117 /** 118 * The system separator character. 119 */ 120 private static final char SYSTEM_SEPARATOR = File.separatorChar; 121 122 /** 123 * The separator character that is the opposite of the system separator. 124 */ 125 private static final char OTHER_SEPARATOR; 126 static { 127 if (isSystemWindows()) { 128 OTHER_SEPARATOR = UNIX_SEPARATOR; 129 } else { 130 OTHER_SEPARATOR = WINDOWS_SEPARATOR; 131 } 132 } 133 134 /** 135 * Instances should NOT be constructed in standard programming. 136 */ 137 public FilenameUtils() { 138 } 139 140 /** 141 * Determines if Windows file system is in use. 142 * 143 * @return true if the system is Windows 144 */ 145 static boolean isSystemWindows() { 146 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; 147 } 148 149 /** 150 * Checks if the character is a separator. 151 * 152 * @param ch the character to check 153 * @return true if it is a separator character 154 */ 155 private static boolean isSeparator(final char ch) { 156 return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR; 157 } 158 159 /** 160 * Normalizes a path, removing double and single dot path steps. 161 * <p> 162 * This method normalizes a path to a standard format. 163 * The input may contain separators in either Unix or Windows format. 164 * The output will contain separators in the format of the system. 165 * <p> 166 * A trailing slash will be retained. 167 * A double slash will be merged to a single slash (but UNC names are handled). 168 * A single dot path segment will be removed. 169 * A double dot will cause that path segment and the one before to be removed. 170 * If the double dot has no parent path segment to work with, {@code null} 171 * is returned. 172 * <p> 173 * The output will be the same on both Unix and Windows except 174 * for the separator character. 175 * <pre> 176 * /foo// --> /foo/ 177 * /foo/./ --> /foo/ 178 * /foo/../bar --> /bar 179 * /foo/../bar/ --> /bar/ 180 * /foo/../bar/../baz --> /baz 181 * //foo//./bar --> /foo/bar 182 * /../ --> null 183 * ../foo --> null 184 * foo/bar/.. --> foo/ 185 * foo/../../bar --> null 186 * foo/../bar --> bar 187 * //server/foo/../bar --> //server/bar 188 * //server/../bar --> null 189 * C:\foo\..\bar --> C:\bar 190 * C:\..\bar --> null 191 * ~/foo/../bar/ --> ~/bar/ 192 * ~/../bar --> null 193 * </pre> 194 * (Note the file separator returned will be correct for Windows/Unix) 195 * 196 * @param fileName the fileName to normalize, null returns null 197 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 198 */ 199 public static String normalize(final String fileName) { 200 return doNormalize(fileName, SYSTEM_SEPARATOR, true); 201 } 202 /** 203 * Normalizes a path, removing double and single dot path steps. 204 * <p> 205 * This method normalizes a path to a standard format. 206 * The input may contain separators in either Unix or Windows format. 207 * The output will contain separators in the format specified. 208 * <p> 209 * A trailing slash will be retained. 210 * A double slash will be merged to a single slash (but UNC names are handled). 211 * A single dot path segment will be removed. 212 * A double dot will cause that path segment and the one before to be removed. 213 * If the double dot has no parent path segment to work with, {@code null} 214 * is returned. 215 * <p> 216 * The output will be the same on both Unix and Windows except 217 * for the separator character. 218 * <pre> 219 * /foo// --> /foo/ 220 * /foo/./ --> /foo/ 221 * /foo/../bar --> /bar 222 * /foo/../bar/ --> /bar/ 223 * /foo/../bar/../baz --> /baz 224 * //foo//./bar --> /foo/bar 225 * /../ --> null 226 * ../foo --> null 227 * foo/bar/.. --> foo/ 228 * foo/../../bar --> null 229 * foo/../bar --> bar 230 * //server/foo/../bar --> //server/bar 231 * //server/../bar --> null 232 * C:\foo\..\bar --> C:\bar 233 * C:\..\bar --> null 234 * ~/foo/../bar/ --> ~/bar/ 235 * ~/../bar --> null 236 * </pre> 237 * The output will be the same on both Unix and Windows including 238 * the separator character. 239 * 240 * @param fileName the fileName to normalize, null returns null 241 * @param unixSeparator {@code true} if a unix separator should 242 * be used or {@code false} if a windows separator should be used. 243 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 244 * @since 2.0 245 */ 246 public static String normalize(final String fileName, final boolean unixSeparator) { 247 final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 248 return doNormalize(fileName, separator, true); 249 } 250 251 /** 252 * Normalizes a path, removing double and single dot path steps, 253 * and removing any final directory separator. 254 * <p> 255 * This method normalizes a path to a standard format. 256 * The input may contain separators in either Unix or Windows format. 257 * The output will contain separators in the format of the system. 258 * <p> 259 * A trailing slash will be removed. 260 * A double slash will be merged to a single slash (but UNC names are handled). 261 * A single dot path segment will be removed. 262 * A double dot will cause that path segment and the one before to be removed. 263 * If the double dot has no parent path segment to work with, {@code null} 264 * is returned. 265 * <p> 266 * The output will be the same on both Unix and Windows except 267 * for the separator character. 268 * <pre> 269 * /foo// --> /foo 270 * /foo/./ --> /foo 271 * /foo/../bar --> /bar 272 * /foo/../bar/ --> /bar 273 * /foo/../bar/../baz --> /baz 274 * //foo//./bar --> /foo/bar 275 * /../ --> null 276 * ../foo --> null 277 * foo/bar/.. --> foo 278 * foo/../../bar --> null 279 * foo/../bar --> bar 280 * //server/foo/../bar --> //server/bar 281 * //server/../bar --> null 282 * C:\foo\..\bar --> C:\bar 283 * C:\..\bar --> null 284 * ~/foo/../bar/ --> ~/bar 285 * ~/../bar --> null 286 * </pre> 287 * (Note the file separator returned will be correct for Windows/Unix) 288 * 289 * @param fileName the fileName to normalize, null returns null 290 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 291 */ 292 public static String normalizeNoEndSeparator(final String fileName) { 293 return doNormalize(fileName, SYSTEM_SEPARATOR, false); 294 } 295 296 /** 297 * Normalizes a path, removing double and single dot path steps, 298 * and removing any final directory separator. 299 * <p> 300 * This method normalizes a path to a standard format. 301 * The input may contain separators in either Unix or Windows format. 302 * The output will contain separators in the format specified. 303 * <p> 304 * A trailing slash will be removed. 305 * A double slash will be merged to a single slash (but UNC names are handled). 306 * A single dot path segment will be removed. 307 * A double dot will cause that path segment and the one before to be removed. 308 * If the double dot has no parent path segment to work with, {@code null} 309 * is returned. 310 * <p> 311 * The output will be the same on both Unix and Windows including 312 * the separator character. 313 * <pre> 314 * /foo// --> /foo 315 * /foo/./ --> /foo 316 * /foo/../bar --> /bar 317 * /foo/../bar/ --> /bar 318 * /foo/../bar/../baz --> /baz 319 * //foo//./bar --> /foo/bar 320 * /../ --> null 321 * ../foo --> null 322 * foo/bar/.. --> foo 323 * foo/../../bar --> null 324 * foo/../bar --> bar 325 * //server/foo/../bar --> //server/bar 326 * //server/../bar --> null 327 * C:\foo\..\bar --> C:\bar 328 * C:\..\bar --> null 329 * ~/foo/../bar/ --> ~/bar 330 * ~/../bar --> null 331 * </pre> 332 * 333 * @param fileName the fileName to normalize, null returns null 334 * @param unixSeparator {@code true} if a unix separator should 335 * be used or {@code false} if a windows separator should be used. 336 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 337 * @since 2.0 338 */ 339 public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) { 340 final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 341 return doNormalize(fileName, separator, false); 342 } 343 344 /** 345 * Internal method to perform the normalization. 346 * 347 * @param fileName the fileName 348 * @param separator The separator character to use 349 * @param keepSeparator true to keep the final separator 350 * @return the normalized fileName. Null bytes inside string will be removed. 351 */ 352 private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) { 353 if (fileName == null) { 354 return null; 355 } 356 357 requireNonNullChars(fileName); 358 359 int size = fileName.length(); 360 if (size == 0) { 361 return fileName; 362 } 363 final int prefix = getPrefixLength(fileName); 364 if (prefix < 0) { 365 return null; 366 } 367 368 final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 369 fileName.getChars(0, fileName.length(), array, 0); 370 371 // fix separators throughout 372 final char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR; 373 for (int i = 0; i < array.length; i++) { 374 if (array[i] == otherSeparator) { 375 array[i] = separator; 376 } 377 } 378 379 // add extra separator on the end to simplify code below 380 boolean lastIsDirectory = true; 381 if (array[size - 1] != separator) { 382 array[size++] = separator; 383 lastIsDirectory = false; 384 } 385 386 // adjoining slashes 387 // If we get here, prefix can only be 0 or greater, size 1 or greater 388 // If prefix is 0, set loop start to 1 to prevent index errors 389 for (int i = (prefix != 0) ? prefix : 1; i < size; i++) { 390 if (array[i] == separator && array[i - 1] == separator) { 391 System.arraycopy(array, i, array, i - 1, size - i); 392 size--; 393 i--; 394 } 395 } 396 397 // dot slash 398 for (int i = prefix + 1; i < size; i++) { 399 if (array[i] == separator && array[i - 1] == '.' && 400 (i == prefix + 1 || array[i - 2] == separator)) { 401 if (i == size - 1) { 402 lastIsDirectory = true; 403 } 404 System.arraycopy(array, i + 1, array, i - 1, size - i); 405 size -=2; 406 i--; 407 } 408 } 409 410 // double dot slash 411 outer: 412 for (int i = prefix + 2; i < size; i++) { 413 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && 414 (i == prefix + 2 || array[i - 3] == separator)) { 415 if (i == prefix + 2) { 416 return null; 417 } 418 if (i == size - 1) { 419 lastIsDirectory = true; 420 } 421 int j; 422 for (j = i - 4 ; j >= prefix; j--) { 423 if (array[j] == separator) { 424 // remove b/../ from a/b/../c 425 System.arraycopy(array, i + 1, array, j + 1, size - i); 426 size -= i - j; 427 i = j + 1; 428 continue outer; 429 } 430 } 431 // remove a/../ from a/../c 432 System.arraycopy(array, i + 1, array, prefix, size - i); 433 size -= i + 1 - prefix; 434 i = prefix + 1; 435 } 436 } 437 438 if (size <= 0) { // should never be less than 0 439 return EMPTY_STRING; 440 } 441 if (size <= prefix) { // should never be less than prefix 442 return new String(array, 0, size); 443 } 444 if (lastIsDirectory && keepSeparator) { 445 return new String(array, 0, size); // keep trailing separator 446 } 447 return new String(array, 0, size - 1); // lose trailing separator 448 } 449 450 /** 451 * Concatenates a fileName to a base path using normal command line style rules. 452 * <p> 453 * The effect is equivalent to resultant directory after changing 454 * directory to the first argument, followed by changing directory to 455 * the second argument. 456 * <p> 457 * The first argument is the base path, the second is the path to concatenate. 458 * The returned path is always normalized via {@link #normalize(String)}, 459 * thus {@code ..} is handled. 460 * <p> 461 * If {@code pathToAdd} is absolute (has an absolute prefix), then 462 * it will be normalized and returned. 463 * Otherwise, the paths will be joined, normalized and returned. 464 * <p> 465 * The output will be the same on both Unix and Windows except 466 * for the separator character. 467 * <pre> 468 * /foo/ + bar --> /foo/bar 469 * /foo + bar --> /foo/bar 470 * /foo + /bar --> /bar 471 * /foo + C:/bar --> C:/bar 472 * /foo + C:bar --> C:bar (*) 473 * /foo/a/ + ../bar --> /foo/bar 474 * /foo/ + ../../bar --> null 475 * /foo/ + /bar --> /bar 476 * /foo/.. + /bar --> /bar 477 * /foo + bar/c.txt --> /foo/bar/c.txt 478 * /foo/c.txt + bar --> /foo/c.txt/bar (!) 479 * </pre> 480 * (*) Note that the Windows relative drive prefix is unreliable when 481 * used with this method. 482 * (!) Note that the first parameter must be a path. If it ends with a name, then 483 * the name will be built into the concatenated path. If this might be a problem, 484 * use {@link #getFullPath(String)} on the base path argument. 485 * 486 * @param basePath the base path to attach to, always treated as a path 487 * @param fullFileNameToAdd the fileName (or path) to attach to the base 488 * @return the concatenated path, or null if invalid. Null bytes inside string will be removed 489 */ 490 public static String concat(final String basePath, final String fullFileNameToAdd) { 491 final int prefix = getPrefixLength(fullFileNameToAdd); 492 if (prefix < 0) { 493 return null; 494 } 495 if (prefix > 0) { 496 return normalize(fullFileNameToAdd); 497 } 498 if (basePath == null) { 499 return null; 500 } 501 final int len = basePath.length(); 502 if (len == 0) { 503 return normalize(fullFileNameToAdd); 504 } 505 final char ch = basePath.charAt(len - 1); 506 if (isSeparator(ch)) { 507 return normalize(basePath + fullFileNameToAdd); 508 } 509 return normalize(basePath + '/' + fullFileNameToAdd); 510 } 511 512 /** 513 * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory). 514 * <p> 515 * The files names are expected to be normalized. 516 * </p> 517 * 518 * Edge cases: 519 * <ul> 520 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> 521 * <li>A directory does not contain itself: return false</li> 522 * <li>A null child file is not contained in any parent: return false</li> 523 * </ul> 524 * 525 * @param canonicalParent 526 * the file to consider as the parent. 527 * @param canonicalChild 528 * the file to consider as the child. 529 * @return true is the candidate leaf is under by the specified composite. False otherwise. 530 * @since 2.2 531 * @see FileUtils#directoryContains(File, File) 532 */ 533 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) { 534 Objects.requireNonNull(canonicalParent, "canonicalParent"); 535 536 if (canonicalChild == null) { 537 return false; 538 } 539 540 if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { 541 return false; 542 } 543 544 return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent); 545 } 546 547 /** 548 * Converts all separators to the Unix separator of forward slash. 549 * 550 * @param path the path to be changed, null ignored 551 * @return the updated path 552 */ 553 public static String separatorsToUnix(final String path) { 554 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) { 555 return path; 556 } 557 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); 558 } 559 560 /** 561 * Converts all separators to the Windows separator of backslash. 562 * 563 * @param path the path to be changed, null ignored 564 * @return the updated path 565 */ 566 public static String separatorsToWindows(final String path) { 567 if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) { 568 return path; 569 } 570 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); 571 } 572 573 /** 574 * Converts all separators to the system separator. 575 * 576 * @param path the path to be changed, null ignored 577 * @return the updated path 578 */ 579 public static String separatorsToSystem(final String path) { 580 if (path == null) { 581 return null; 582 } 583 return isSystemWindows() ? separatorsToWindows(path) : separatorsToUnix(path); 584 } 585 586 /** 587 * Returns the length of the fileName prefix, such as {@code C:/} or {@code ~/}. 588 * <p> 589 * This method will handle a file in either Unix or Windows format. 590 * <p> 591 * The prefix length includes the first slash in the full fileName 592 * if applicable. Thus, it is possible that the length returned is greater 593 * than the length of the input string. 594 * <pre> 595 * Windows: 596 * a\b\c.txt --> 0 --> relative 597 * \a\b\c.txt --> 1 --> current drive absolute 598 * C:a\b\c.txt --> 2 --> drive relative 599 * C:\a\b\c.txt --> 3 --> absolute 600 * \\server\a\b\c.txt --> 9 --> UNC 601 * \\\a\b\c.txt --> -1 --> error 602 * 603 * Unix: 604 * a/b/c.txt --> 0 --> relative 605 * /a/b/c.txt --> 1 --> absolute 606 * ~/a/b/c.txt --> 2 --> current user 607 * ~ --> 2 --> current user (slash added) 608 * ~user/a/b/c.txt --> 6 --> named user 609 * ~user --> 6 --> named user (slash added) 610 * //server/a/b/c.txt --> 9 611 * ///a/b/c.txt --> -1 --> error 612 * C: --> 0 --> valid filename as only null byte and / are reserved characters 613 * </pre> 614 * <p> 615 * The output will be the same irrespective of the machine that the code is running on. 616 * ie. both Unix and Windows prefixes are matched regardless. 617 * 618 * Note that a leading // (or \\) is used to indicate a UNC name on Windows. 619 * These must be followed by a server name, so double-slashes are not collapsed 620 * to a single slash at the start of the fileName. 621 * 622 * @param fileName the fileName to find the prefix in, null returns -1 623 * @return the length of the prefix, -1 if invalid or null 624 */ 625 public static int getPrefixLength(final String fileName) { 626 if (fileName == null) { 627 return NOT_FOUND; 628 } 629 final int len = fileName.length(); 630 if (len == 0) { 631 return 0; 632 } 633 char ch0 = fileName.charAt(0); 634 if (ch0 == ':') { 635 return NOT_FOUND; 636 } 637 if (len == 1) { 638 if (ch0 == '~') { 639 return 2; // return a length greater than the input 640 } 641 return isSeparator(ch0) ? 1 : 0; 642 } 643 if (ch0 == '~') { 644 int posUnix = fileName.indexOf(UNIX_SEPARATOR, 1); 645 int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 1); 646 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { 647 return len + 1; // return a length greater than the input 648 } 649 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 650 posWin = posWin == NOT_FOUND ? posUnix : posWin; 651 return Math.min(posUnix, posWin) + 1; 652 } 653 final char ch1 = fileName.charAt(1); 654 if (ch1 == ':') { 655 ch0 = Character.toUpperCase(ch0); 656 if (ch0 >= 'A' && ch0 <= 'Z') { 657 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) { 658 return 0; 659 } 660 if (len == 2 || !isSeparator(fileName.charAt(2))) { 661 return 2; 662 } 663 return 3; 664 } 665 if (ch0 == UNIX_SEPARATOR) { 666 return 1; 667 } 668 return NOT_FOUND; 669 670 } 671 if (!isSeparator(ch0) || !isSeparator(ch1)) { 672 return isSeparator(ch0) ? 1 : 0; 673 } 674 int posUnix = fileName.indexOf(UNIX_SEPARATOR, 2); 675 int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 2); 676 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { 677 return NOT_FOUND; 678 } 679 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 680 posWin = posWin == NOT_FOUND ? posUnix : posWin; 681 final int pos = Math.min(posUnix, posWin) + 1; 682 final String hostnamePart = fileName.substring(2, pos - 1); 683 return isValidHostName(hostnamePart) ? pos : NOT_FOUND; 684 } 685 686 /** 687 * Returns the index of the last directory separator character. 688 * <p> 689 * This method will handle a file in either Unix or Windows format. 690 * The position of the last forward or backslash is returned. 691 * <p> 692 * The output will be the same irrespective of the machine that the code is running on. 693 * 694 * @param fileName the fileName to find the last path separator in, null returns -1 695 * @return the index of the last separator character, or -1 if there 696 * is no such character 697 */ 698 public static int indexOfLastSeparator(final String fileName) { 699 if (fileName == null) { 700 return NOT_FOUND; 701 } 702 final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR); 703 final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR); 704 return Math.max(lastUnixPos, lastWindowsPos); 705 } 706 707 /** 708 * Returns the index of the last extension separator character, which is a dot. 709 * <p> 710 * This method also checks that there is no directory separator after the last dot. To do this it uses 711 * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format. 712 * </p> 713 * <p> 714 * The output will be the same irrespective of the machine that the code is running on, with the 715 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 716 * </p> 717 * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". 718 * In this case, the name wouldn't be the name of a file, but the identifier of an 719 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 720 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 721 * an {@link IllegalArgumentException} for names like this. 722 * 723 * @param fileName 724 * the fileName to find the last extension separator in, null returns -1 725 * @return the index of the last extension separator character, or -1 if there is no such character 726 * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, 727 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 728 */ 729 public static int indexOfExtension(final String fileName) throws IllegalArgumentException { 730 if (fileName == null) { 731 return NOT_FOUND; 732 } 733 if (isSystemWindows()) { 734 // Special handling for NTFS ADS: Don't accept colon in the fileName. 735 final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName)); 736 if (offset != -1) { 737 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden."); 738 } 739 } 740 final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); 741 final int lastSeparator = indexOfLastSeparator(fileName); 742 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; 743 } 744 745 /** 746 * Gets the prefix from a full fileName, such as {@code C:/} 747 * or {@code ~/}. 748 * <p> 749 * This method will handle a file in either Unix or Windows format. 750 * The prefix includes the first slash in the full fileName where applicable. 751 * <pre> 752 * Windows: 753 * a\b\c.txt --> "" --> relative 754 * \a\b\c.txt --> "\" --> current drive absolute 755 * C:a\b\c.txt --> "C:" --> drive relative 756 * C:\a\b\c.txt --> "C:\" --> absolute 757 * \\server\a\b\c.txt --> "\\server\" --> UNC 758 * 759 * Unix: 760 * a/b/c.txt --> "" --> relative 761 * /a/b/c.txt --> "/" --> absolute 762 * ~/a/b/c.txt --> "~/" --> current user 763 * ~ --> "~/" --> current user (slash added) 764 * ~user/a/b/c.txt --> "~user/" --> named user 765 * ~user --> "~user/" --> named user (slash added) 766 * </pre> 767 * <p> 768 * The output will be the same irrespective of the machine that the code is running on. 769 * ie. both Unix and Windows prefixes are matched regardless. 770 * 771 * @param fileName the fileName to query, null returns null 772 * @return the prefix of the file, null if invalid. Null bytes inside string will be removed 773 */ 774 public static String getPrefix(final String fileName) { 775 if (fileName == null) { 776 return null; 777 } 778 final int len = getPrefixLength(fileName); 779 if (len < 0) { 780 return null; 781 } 782 if (len > fileName.length()) { 783 requireNonNullChars(fileName + UNIX_SEPARATOR); 784 return fileName + UNIX_SEPARATOR; 785 } 786 final String path = fileName.substring(0, len); 787 requireNonNullChars(path); 788 return path; 789 } 790 791 /** 792 * Gets the path from a full fileName, which excludes the prefix. 793 * <p> 794 * This method will handle a file in either Unix or Windows format. 795 * The method is entirely text based, and returns the text before and 796 * including the last forward or backslash. 797 * <pre> 798 * C:\a\b\c.txt --> a\b\ 799 * ~/a/b/c.txt --> a/b/ 800 * a.txt --> "" 801 * a/b/c --> a/b/ 802 * a/b/c/ --> a/b/c/ 803 * </pre> 804 * <p> 805 * The output will be the same irrespective of the machine that the code is running on. 806 * <p> 807 * This method drops the prefix from the result. 808 * See {@link #getFullPath(String)} for the method that retains the prefix. 809 * 810 * @param fileName the fileName to query, null returns null 811 * @return the path of the file, an empty string if none exists, null if invalid. 812 * Null bytes inside string will be removed 813 */ 814 public static String getPath(final String fileName) { 815 return doGetPath(fileName, 1); 816 } 817 818 /** 819 * Gets the path from a full fileName, which excludes the prefix, and 820 * also excluding the final directory separator. 821 * <p> 822 * This method will handle a file in either Unix or Windows format. 823 * The method is entirely text based, and returns the text before the 824 * last forward or backslash. 825 * <pre> 826 * C:\a\b\c.txt --> a\b 827 * ~/a/b/c.txt --> a/b 828 * a.txt --> "" 829 * a/b/c --> a/b 830 * a/b/c/ --> a/b/c 831 * </pre> 832 * <p> 833 * The output will be the same irrespective of the machine that the code is running on. 834 * <p> 835 * This method drops the prefix from the result. 836 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 837 * 838 * @param fileName the fileName to query, null returns null 839 * @return the path of the file, an empty string if none exists, null if invalid. 840 * Null bytes inside string will be removed 841 */ 842 public static String getPathNoEndSeparator(final String fileName) { 843 return doGetPath(fileName, 0); 844 } 845 846 /** 847 * Does the work of getting the path. 848 * 849 * @param fileName the fileName 850 * @param separatorAdd 0 to omit the end separator, 1 to return it 851 * @return the path. Null bytes inside string will be removed 852 */ 853 private static String doGetPath(final String fileName, final int separatorAdd) { 854 if (fileName == null) { 855 return null; 856 } 857 final int prefix = getPrefixLength(fileName); 858 if (prefix < 0) { 859 return null; 860 } 861 final int index = indexOfLastSeparator(fileName); 862 final int endIndex = index+separatorAdd; 863 if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) { 864 return EMPTY_STRING; 865 } 866 final String path = fileName.substring(prefix, endIndex); 867 requireNonNullChars(path); 868 return path; 869 } 870 871 /** 872 * Gets the full path from a full fileName, which is the prefix + path. 873 * <p> 874 * This method will handle a file in either Unix or Windows format. 875 * The method is entirely text based, and returns the text before and 876 * including the last forward or backslash. 877 * <pre> 878 * C:\a\b\c.txt --> C:\a\b\ 879 * ~/a/b/c.txt --> ~/a/b/ 880 * a.txt --> "" 881 * a/b/c --> a/b/ 882 * a/b/c/ --> a/b/c/ 883 * C: --> C: 884 * C:\ --> C:\ 885 * ~ --> ~/ 886 * ~/ --> ~/ 887 * ~user --> ~user/ 888 * ~user/ --> ~user/ 889 * </pre> 890 * <p> 891 * The output will be the same irrespective of the machine that the code is running on. 892 * 893 * @param fileName the fileName to query, null returns null 894 * @return the path of the file, an empty string if none exists, null if invalid 895 */ 896 public static String getFullPath(final String fileName) { 897 return doGetFullPath(fileName, true); 898 } 899 900 /** 901 * Gets the full path from a full fileName, which is the prefix + path, 902 * and also excluding the final directory separator. 903 * <p> 904 * This method will handle a file in either Unix or Windows format. 905 * The method is entirely text based, and returns the text before the 906 * last forward or backslash. 907 * <pre> 908 * C:\a\b\c.txt --> C:\a\b 909 * ~/a/b/c.txt --> ~/a/b 910 * a.txt --> "" 911 * a/b/c --> a/b 912 * a/b/c/ --> a/b/c 913 * C: --> C: 914 * C:\ --> C:\ 915 * ~ --> ~ 916 * ~/ --> ~ 917 * ~user --> ~user 918 * ~user/ --> ~user 919 * </pre> 920 * <p> 921 * The output will be the same irrespective of the machine that the code is running on. 922 * 923 * @param fileName the fileName to query, null returns null 924 * @return the path of the file, an empty string if none exists, null if invalid 925 */ 926 public static String getFullPathNoEndSeparator(final String fileName) { 927 return doGetFullPath(fileName, false); 928 } 929 930 /** 931 * Does the work of getting the path. 932 * 933 * @param fileName the fileName 934 * @param includeSeparator true to include the end separator 935 * @return the path 936 */ 937 private static String doGetFullPath(final String fileName, final boolean includeSeparator) { 938 if (fileName == null) { 939 return null; 940 } 941 final int prefix = getPrefixLength(fileName); 942 if (prefix < 0) { 943 return null; 944 } 945 if (prefix >= fileName.length()) { 946 if (includeSeparator) { 947 return getPrefix(fileName); // add end slash if necessary 948 } 949 return fileName; 950 } 951 final int index = indexOfLastSeparator(fileName); 952 if (index < 0) { 953 return fileName.substring(0, prefix); 954 } 955 int end = index + (includeSeparator ? 1 : 0); 956 if (end == 0) { 957 end++; 958 } 959 return fileName.substring(0, end); 960 } 961 962 /** 963 * Gets the name minus the path from a full fileName. 964 * <p> 965 * This method will handle a file in either Unix or Windows format. 966 * The text after the last forward or backslash is returned. 967 * <pre> 968 * a/b/c.txt --> c.txt 969 * a.txt --> a.txt 970 * a/b/c --> c 971 * a/b/c/ --> "" 972 * </pre> 973 * <p> 974 * The output will be the same irrespective of the machine that the code is running on. 975 * 976 * @param fileName the fileName to query, null returns null 977 * @return the name of the file without the path, or an empty string if none exists. 978 * Null bytes inside string will be removed 979 */ 980 public static String getName(final String fileName) { 981 if (fileName == null) { 982 return null; 983 } 984 requireNonNullChars(fileName); 985 final int index = indexOfLastSeparator(fileName); 986 return fileName.substring(index + 1); 987 } 988 989 /** 990 * Checks the input for null bytes, a sign of unsanitized data being passed to to file level functions. 991 * 992 * This may be used for poison byte attacks. 993 * 994 * @param path the path to check 995 */ 996 private static void requireNonNullChars(final String path) { 997 if (path.indexOf(0) >= 0) { 998 throw new IllegalArgumentException("Null byte present in file/path name. There are no " 999 + "known legitimate use cases for such data, but several injection attacks may use it"); 1000 } 1001 } 1002 1003 /** 1004 * Gets the base name, minus the full path and extension, from a full fileName. 1005 * <p> 1006 * This method will handle a file in either Unix or Windows format. 1007 * The text after the last forward or backslash and before the last dot is returned. 1008 * <pre> 1009 * a/b/c.txt --> c 1010 * a.txt --> a 1011 * a/b/c --> c 1012 * a/b/c/ --> "" 1013 * </pre> 1014 * <p> 1015 * The output will be the same irrespective of the machine that the code is running on. 1016 * 1017 * @param fileName the fileName to query, null returns null 1018 * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string 1019 * will be removed 1020 */ 1021 public static String getBaseName(final String fileName) { 1022 return removeExtension(getName(fileName)); 1023 } 1024 1025 /** 1026 * Gets the extension of a fileName. 1027 * <p> 1028 * This method returns the textual part of the fileName after the last dot. 1029 * There must be no directory separator after the dot. 1030 * <pre> 1031 * foo.txt --> "txt" 1032 * a/b/c.jpg --> "jpg" 1033 * a/b.txt/c --> "" 1034 * a/b/c --> "" 1035 * </pre> 1036 * <p> 1037 * The output will be the same irrespective of the machine that the code is running on, with the 1038 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 1039 * </p> 1040 * <p> 1041 * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". 1042 * In this case, the name wouldn't be the name of a file, but the identifier of an 1043 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 1044 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 1045 * an {@link IllegalArgumentException} for names like this. 1046 * 1047 * @param fileName the fileName to retrieve the extension of. 1048 * @return the extension of the file or an empty string if none exists or {@code null} 1049 * if the fileName is {@code null}. 1050 * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, 1051 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 1052 */ 1053 public static String getExtension(final String fileName) throws IllegalArgumentException { 1054 if (fileName == null) { 1055 return null; 1056 } 1057 final int index = indexOfExtension(fileName); 1058 if (index == NOT_FOUND) { 1059 return EMPTY_STRING; 1060 } 1061 return fileName.substring(index + 1); 1062 } 1063 1064 /** 1065 * Special handling for NTFS ADS: Don't accept colon in the fileName. 1066 * 1067 * @param fileName a file name 1068 * @return ADS offsets. 1069 */ 1070 private static int getAdsCriticalOffset(final String fileName) { 1071 // Step 1: Remove leading path segments. 1072 final int offset1 = fileName.lastIndexOf(SYSTEM_SEPARATOR); 1073 final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); 1074 if (offset1 == -1) { 1075 if (offset2 == -1) { 1076 return 0; 1077 } 1078 return offset2 + 1; 1079 } 1080 if (offset2 == -1) { 1081 return offset1 + 1; 1082 } 1083 return Math.max(offset1, offset2) + 1; 1084 } 1085 1086 /** 1087 * Removes the extension from a fileName. 1088 * <p> 1089 * This method returns the textual part of the fileName before the last dot. 1090 * There must be no directory separator after the dot. 1091 * <pre> 1092 * foo.txt --> foo 1093 * a\b\c.jpg --> a\b\c 1094 * a\b\c --> a\b\c 1095 * a.b\c --> a.b\c 1096 * </pre> 1097 * <p> 1098 * The output will be the same irrespective of the machine that the code is running on. 1099 * 1100 * @param fileName the fileName to query, null returns null 1101 * @return the fileName minus the extension 1102 */ 1103 public static String removeExtension(final String fileName) { 1104 if (fileName == null) { 1105 return null; 1106 } 1107 requireNonNullChars(fileName); 1108 1109 final int index = indexOfExtension(fileName); 1110 if (index == NOT_FOUND) { 1111 return fileName; 1112 } 1113 return fileName.substring(0, index); 1114 } 1115 1116 /** 1117 * Checks whether two fileNames are equal exactly. 1118 * <p> 1119 * No processing is performed on the fileNames other than comparison, 1120 * thus this is merely a null-safe case-sensitive equals. 1121 * 1122 * @param fileName1 the first fileName to query, may be null 1123 * @param fileName2 the second fileName to query, may be null 1124 * @return true if the fileNames are equal, null equals null 1125 * @see IOCase#SENSITIVE 1126 */ 1127 public static boolean equals(final String fileName1, final String fileName2) { 1128 return equals(fileName1, fileName2, false, IOCase.SENSITIVE); 1129 } 1130 1131 /** 1132 * Checks whether two fileNames are equal using the case rules of the system. 1133 * <p> 1134 * No processing is performed on the fileNames other than comparison. 1135 * The check is case-sensitive on Unix and case-insensitive on Windows. 1136 * 1137 * @param fileName1 the first fileName to query, may be null 1138 * @param fileName2 the second fileName to query, may be null 1139 * @return true if the fileNames are equal, null equals null 1140 * @see IOCase#SYSTEM 1141 */ 1142 public static boolean equalsOnSystem(final String fileName1, final String fileName2) { 1143 return equals(fileName1, fileName2, false, IOCase.SYSTEM); 1144 } 1145 1146 /** 1147 * Checks whether two fileNames are equal after both have been normalized. 1148 * <p> 1149 * Both fileNames are first passed to {@link #normalize(String)}. 1150 * The check is then performed in a case-sensitive manner. 1151 * 1152 * @param fileName1 the first fileName to query, may be null 1153 * @param fileName2 the second fileName to query, may be null 1154 * @return true if the fileNames are equal, null equals null 1155 * @see IOCase#SENSITIVE 1156 */ 1157 public static boolean equalsNormalized(final String fileName1, final String fileName2) { 1158 return equals(fileName1, fileName2, true, IOCase.SENSITIVE); 1159 } 1160 1161 /** 1162 * Checks whether two fileNames are equal after both have been normalized 1163 * and using the case rules of the system. 1164 * <p> 1165 * Both fileNames are first passed to {@link #normalize(String)}. 1166 * The check is then performed case-sensitive on Unix and 1167 * case-insensitive on Windows. 1168 * 1169 * @param fileName1 the first fileName to query, may be null 1170 * @param fileName2 the second fileName to query, may be null 1171 * @return true if the fileNames are equal, null equals null 1172 * @see IOCase#SYSTEM 1173 */ 1174 public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { 1175 return equals(fileName1, fileName2, true, IOCase.SYSTEM); 1176 } 1177 1178 /** 1179 * Checks whether two fileNames are equal, optionally normalizing and providing 1180 * control over the case-sensitivity. 1181 * 1182 * @param fileName1 the first fileName to query, may be null 1183 * @param fileName2 the second fileName to query, may be null 1184 * @param normalized whether to normalize the fileNames 1185 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1186 * @return true if the fileNames are equal, null equals null 1187 * @since 1.3 1188 */ 1189 public static boolean equals( 1190 String fileName1, String fileName2, 1191 final boolean normalized, IOCase caseSensitivity) { 1192 1193 if (fileName1 == null || fileName2 == null) { 1194 return fileName1 == null && fileName2 == null; 1195 } 1196 if (normalized) { 1197 fileName1 = normalize(fileName1); 1198 if (fileName1 == null) { 1199 return false; 1200 } 1201 fileName2 = normalize(fileName2); 1202 if (fileName2 == null) { 1203 return false; 1204 } 1205 } 1206 if (caseSensitivity == null) { 1207 caseSensitivity = IOCase.SENSITIVE; 1208 } 1209 return caseSensitivity.checkEquals(fileName1, fileName2); 1210 } 1211 1212 /** 1213 * Checks whether the extension of the fileName is that specified. 1214 * <p> 1215 * This method obtains the extension as the textual part of the fileName 1216 * after the last dot. There must be no directory separator after the dot. 1217 * The extension check is case-sensitive on all platforms. 1218 * 1219 * @param fileName the fileName to query, null returns false 1220 * @param extension the extension to check for, null or empty checks for no extension 1221 * @return true if the fileName has the specified extension 1222 * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes 1223 */ 1224 public static boolean isExtension(final String fileName, final String extension) { 1225 if (fileName == null) { 1226 return false; 1227 } 1228 requireNonNullChars(fileName); 1229 1230 if (extension == null || extension.isEmpty()) { 1231 return indexOfExtension(fileName) == NOT_FOUND; 1232 } 1233 final String fileExt = getExtension(fileName); 1234 return fileExt.equals(extension); 1235 } 1236 1237 /** 1238 * Checks whether the extension of the fileName is one of those specified. 1239 * <p> 1240 * This method obtains the extension as the textual part of the fileName 1241 * after the last dot. There must be no directory separator after the dot. 1242 * The extension check is case-sensitive on all platforms. 1243 * 1244 * @param fileName the fileName to query, null returns false 1245 * @param extensions the extensions to check for, null checks for no extension 1246 * @return true if the fileName is one of the extensions 1247 * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes 1248 */ 1249 public static boolean isExtension(final String fileName, final String... extensions) { 1250 if (fileName == null) { 1251 return false; 1252 } 1253 requireNonNullChars(fileName); 1254 1255 if (extensions == null || extensions.length == 0) { 1256 return indexOfExtension(fileName) == NOT_FOUND; 1257 } 1258 final String fileExt = getExtension(fileName); 1259 for (final String extension : extensions) { 1260 if (fileExt.equals(extension)) { 1261 return true; 1262 } 1263 } 1264 return false; 1265 } 1266 1267 /** 1268 * Checks whether the extension of the fileName is one of those specified. 1269 * <p> 1270 * This method obtains the extension as the textual part of the fileName 1271 * after the last dot. There must be no directory separator after the dot. 1272 * The extension check is case-sensitive on all platforms. 1273 * 1274 * @param fileName the fileName to query, null returns false 1275 * @param extensions the extensions to check for, null checks for no extension 1276 * @return true if the fileName is one of the extensions 1277 * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes 1278 */ 1279 public static boolean isExtension(final String fileName, final Collection<String> extensions) { 1280 if (fileName == null) { 1281 return false; 1282 } 1283 requireNonNullChars(fileName); 1284 1285 if (extensions == null || extensions.isEmpty()) { 1286 return indexOfExtension(fileName) == NOT_FOUND; 1287 } 1288 final String fileExt = getExtension(fileName); 1289 for (final String extension : extensions) { 1290 if (fileExt.equals(extension)) { 1291 return true; 1292 } 1293 } 1294 return false; 1295 } 1296 1297 /** 1298 * Checks a fileName to see if it matches the specified wildcard matcher, 1299 * always testing case-sensitive. 1300 * <p> 1301 * The wildcard matcher uses the characters '?' and '*' to represent a 1302 * single or multiple (zero or more) wildcard characters. 1303 * This is the same as often found on Dos/Unix command lines. 1304 * The check is case-sensitive always. 1305 * <pre> 1306 * wildcardMatch("c.txt", "*.txt") --> true 1307 * wildcardMatch("c.txt", "*.jpg") --> false 1308 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1309 * wildcardMatch("c.txt", "*.???") --> true 1310 * wildcardMatch("c.txt", "*.????") --> false 1311 * </pre> 1312 * N.B. the sequence "*?" does not work properly at present in match strings. 1313 * 1314 * @param fileName the fileName to match on 1315 * @param wildcardMatcher the wildcard string to match against 1316 * @return true if the fileName matches the wildcard string 1317 * @see IOCase#SENSITIVE 1318 */ 1319 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) { 1320 return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE); 1321 } 1322 1323 /** 1324 * Checks a fileName to see if it matches the specified wildcard matcher 1325 * using the case rules of the system. 1326 * <p> 1327 * The wildcard matcher uses the characters '?' and '*' to represent a 1328 * single or multiple (zero or more) wildcard characters. 1329 * This is the same as often found on Dos/Unix command lines. 1330 * The check is case-sensitive on Unix and case-insensitive on Windows. 1331 * <pre> 1332 * wildcardMatch("c.txt", "*.txt") --> true 1333 * wildcardMatch("c.txt", "*.jpg") --> false 1334 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1335 * wildcardMatch("c.txt", "*.???") --> true 1336 * wildcardMatch("c.txt", "*.????") --> false 1337 * </pre> 1338 * N.B. the sequence "*?" does not work properly at present in match strings. 1339 * 1340 * @param fileName the fileName to match on 1341 * @param wildcardMatcher the wildcard string to match against 1342 * @return true if the fileName matches the wildcard string 1343 * @see IOCase#SYSTEM 1344 */ 1345 public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) { 1346 return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM); 1347 } 1348 1349 /** 1350 * Checks a fileName to see if it matches the specified wildcard matcher 1351 * allowing control over case-sensitivity. 1352 * <p> 1353 * The wildcard matcher uses the characters '?' and '*' to represent a 1354 * single or multiple (zero or more) wildcard characters. 1355 * N.B. the sequence "*?" does not work properly at present in match strings. 1356 * 1357 * @param fileName the fileName to match on 1358 * @param wildcardMatcher the wildcard string to match against 1359 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1360 * @return true if the fileName matches the wildcard string 1361 * @since 1.3 1362 */ 1363 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase caseSensitivity) { 1364 if (fileName == null && wildcardMatcher == null) { 1365 return true; 1366 } 1367 if (fileName == null || wildcardMatcher == null) { 1368 return false; 1369 } 1370 if (caseSensitivity == null) { 1371 caseSensitivity = IOCase.SENSITIVE; 1372 } 1373 final String[] wcs = splitOnTokens(wildcardMatcher); 1374 boolean anyChars = false; 1375 int textIdx = 0; 1376 int wcsIdx = 0; 1377 final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length); 1378 1379 // loop around a backtrack stack, to handle complex * matching 1380 do { 1381 if (!backtrack.isEmpty()) { 1382 final int[] array = backtrack.pop(); 1383 wcsIdx = array[0]; 1384 textIdx = array[1]; 1385 anyChars = true; 1386 } 1387 1388 // loop whilst tokens and text left to process 1389 while (wcsIdx < wcs.length) { 1390 1391 if (wcs[wcsIdx].equals("?")) { 1392 // ? so move to next text char 1393 textIdx++; 1394 if (textIdx > fileName.length()) { 1395 break; 1396 } 1397 anyChars = false; 1398 1399 } else if (wcs[wcsIdx].equals("*")) { 1400 // set any chars status 1401 anyChars = true; 1402 if (wcsIdx == wcs.length - 1) { 1403 textIdx = fileName.length(); 1404 } 1405 1406 } else { 1407 // matching text token 1408 if (anyChars) { 1409 // any chars then try to locate text token 1410 textIdx = caseSensitivity.checkIndexOf(fileName, textIdx, wcs[wcsIdx]); 1411 if (textIdx == NOT_FOUND) { 1412 // token not found 1413 break; 1414 } 1415 final int repeat = caseSensitivity.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]); 1416 if (repeat >= 0) { 1417 backtrack.push(new int[] {wcsIdx, repeat}); 1418 } 1419 } else if (!caseSensitivity.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) { 1420 // matching from current position 1421 // couldn't match token 1422 break; 1423 } 1424 1425 // matched text token, move text index to end of matched token 1426 textIdx += wcs[wcsIdx].length(); 1427 anyChars = false; 1428 } 1429 1430 wcsIdx++; 1431 } 1432 1433 // full match 1434 if (wcsIdx == wcs.length && textIdx == fileName.length()) { 1435 return true; 1436 } 1437 1438 } while (!backtrack.isEmpty()); 1439 1440 return false; 1441 } 1442 1443 /** 1444 * Splits a string into a number of tokens. 1445 * The text is split by '?' and '*'. 1446 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1447 * 1448 * @param text the text to split 1449 * @return the array of tokens, never null 1450 */ 1451 static String[] splitOnTokens(final String text) { 1452 // used by wildcardMatch 1453 // package level so a unit test may run on this 1454 1455 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { 1456 return new String[] { text }; 1457 } 1458 1459 final char[] array = text.toCharArray(); 1460 final ArrayList<String> list = new ArrayList<>(); 1461 final StringBuilder buffer = new StringBuilder(); 1462 char prevChar = 0; 1463 for (final char ch : array) { 1464 if (ch == '?' || ch == '*') { 1465 if (buffer.length() != 0) { 1466 list.add(buffer.toString()); 1467 buffer.setLength(0); 1468 } 1469 if (ch == '?') { 1470 list.add("?"); 1471 } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' 1472 list.add("*"); 1473 } 1474 } else { 1475 buffer.append(ch); 1476 } 1477 prevChar = ch; 1478 } 1479 if (buffer.length() != 0) { 1480 list.add(buffer.toString()); 1481 } 1482 1483 return list.toArray(EMPTY_STRING_ARRAY); 1484 } 1485 1486 /** 1487 * Checks whether a given string is a valid host name according to 1488 * RFC 3986. 1489 * 1490 * <p>Accepted are IP addresses (v4 and v6) as well as what the 1491 * RFC calls a "reg-name". Percent encoded names don't seem to be 1492 * valid names in UNC paths.</p> 1493 * 1494 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1495 * @param name the hostname to validate 1496 * @return true if the given name is a valid host name 1497 */ 1498 private static boolean isValidHostName(final String name) { 1499 return isIPv6Address(name) || isRFC3986HostName(name); 1500 } 1501 1502 private static final Pattern IPV4_PATTERN = 1503 Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); 1504 private static final int IPV4_MAX_OCTET_VALUE = 255; 1505 1506 /** 1507 * Checks whether a given string represents a valid IPv4 address. 1508 * 1509 * @param name the name to validate 1510 * @return true if the given name is a valid IPv4 address 1511 */ 1512 // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address 1513 private static boolean isIPv4Address(final String name) { 1514 final Matcher m = IPV4_PATTERN.matcher(name); 1515 if (!m.matches() || m.groupCount() != 4) { 1516 return false; 1517 } 1518 1519 // verify that address subgroups are legal 1520 for (int i = 1; i <= 4; i++) { 1521 final String ipSegment = m.group(i); 1522 final int iIpSegment = Integer.parseInt(ipSegment); 1523 if (iIpSegment > IPV4_MAX_OCTET_VALUE) { 1524 return false; 1525 } 1526 1527 if (ipSegment.length() > 1 && ipSegment.startsWith("0")) { 1528 return false; 1529 } 1530 1531 } 1532 1533 return true; 1534 } 1535 1536 private static final int IPV6_MAX_HEX_GROUPS = 8; 1537 private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; 1538 private static final int MAX_UNSIGNED_SHORT = 0xffff; 1539 private static final int BASE_16 = 16; 1540 1541 // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address 1542 /** 1543 * Checks whether a given string represents a valid IPv6 address. 1544 * 1545 * @param inet6Address the name to validate 1546 * @return true if the given name is a valid IPv6 address 1547 */ 1548 private static boolean isIPv6Address(final String inet6Address) { 1549 final boolean containsCompressedZeroes = inet6Address.contains("::"); 1550 if (containsCompressedZeroes && (inet6Address.indexOf("::") != inet6Address.lastIndexOf("::"))) { 1551 return false; 1552 } 1553 if ((inet6Address.startsWith(":") && !inet6Address.startsWith("::")) 1554 || (inet6Address.endsWith(":") && !inet6Address.endsWith("::"))) { 1555 return false; 1556 } 1557 String[] octets = inet6Address.split(":"); 1558 if (containsCompressedZeroes) { 1559 final List<String> octetList = new ArrayList<>(Arrays.asList(octets)); 1560 if (inet6Address.endsWith("::")) { 1561 // String.split() drops ending empty segments 1562 octetList.add(""); 1563 } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { 1564 octetList.remove(0); 1565 } 1566 octets = octetList.toArray(EMPTY_STRING_ARRAY); 1567 } 1568 if (octets.length > IPV6_MAX_HEX_GROUPS) { 1569 return false; 1570 } 1571 int validOctets = 0; 1572 int emptyOctets = 0; // consecutive empty chunks 1573 for (int index = 0; index < octets.length; index++) { 1574 final String octet = octets[index]; 1575 if (octet.isEmpty()) { 1576 emptyOctets++; 1577 if (emptyOctets > 1) { 1578 return false; 1579 } 1580 } else { 1581 emptyOctets = 0; 1582 // Is last chunk an IPv4 address? 1583 if (index == octets.length - 1 && octet.contains(".")) { 1584 if (!isIPv4Address(octet)) { 1585 return false; 1586 } 1587 validOctets += 2; 1588 continue; 1589 } 1590 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { 1591 return false; 1592 } 1593 final int octetInt; 1594 try { 1595 octetInt = Integer.parseInt(octet, BASE_16); 1596 } catch (final NumberFormatException e) { 1597 return false; 1598 } 1599 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { 1600 return false; 1601 } 1602 } 1603 validOctets++; 1604 } 1605 return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes); 1606 } 1607 1608 private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); 1609 1610 /** 1611 * Checks whether a given string is a valid host name according to 1612 * RFC 3986 - not accepting IP addresses. 1613 * 1614 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1615 * @param name the hostname to validate 1616 * @return true if the given name is a valid host name 1617 */ 1618 private static boolean isRFC3986HostName(final String name) { 1619 final String[] parts = name.split("\\.", -1); 1620 for (int i = 0; i < parts.length; i++) { 1621 if (parts[i].isEmpty()) { 1622 // trailing dot is legal, otherwise we've hit a .. sequence 1623 return i == parts.length - 1; 1624 } 1625 if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { 1626 return false; 1627 } 1628 } 1629 return true; 1630 } 1631}