001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.math.BigInteger;
025import java.nio.ByteBuffer;
026import java.nio.charset.StandardCharsets;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032
033import org.apache.commons.compress.archivers.zip.ZipEncoding;
034import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
035import org.apache.commons.compress.utils.CharsetNames;
036import org.apache.commons.compress.utils.IOUtils;
037
038import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
039import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
040import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_NUMBYTES_LEN;
041import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_OFFSET_LEN;
042
043/**
044 * This class provides static utility methods to work with byte streams.
045 *
046 * @Immutable
047 */
048// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
049public class TarUtils {
050
051    private static final int BYTE_MASK = 255;
052
053    static final ZipEncoding DEFAULT_ENCODING =
054        ZipEncodingHelper.getZipEncoding(null);
055
056    /**
057     * Encapsulates the algorithms used up to Commons Compress 1.3 as
058     * ZipEncoding.
059     */
060    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
061            @Override
062            public boolean canEncode(final String name) { return true; }
063
064            @Override
065            public ByteBuffer encode(final String name) {
066                final int length = name.length();
067                final byte[] buf = new byte[length];
068
069                // copy until end of input or output is reached.
070                for (int i = 0; i < length; ++i) {
071                    buf[i] = (byte) name.charAt(i);
072                }
073                return ByteBuffer.wrap(buf);
074            }
075
076            @Override
077            public String decode(final byte[] buffer) {
078                final int length = buffer.length;
079                final StringBuilder result = new StringBuilder(length);
080
081                for (final byte b : buffer) {
082                    if (b == 0) { // Trailing null
083                        break;
084                    }
085                    result.append((char) (b & 0xFF)); // Allow for sign-extension
086                }
087
088                return result.toString();
089            }
090        };
091
092    /** Private constructor to prevent instantiation of this utility class. */
093    private TarUtils(){
094    }
095
096    /**
097     * Parse an octal string from a buffer.
098     *
099     * <p>Leading spaces are ignored.
100     * The buffer must contain a trailing space or NUL,
101     * and may contain an additional trailing space or NUL.</p>
102     *
103     * <p>The input buffer is allowed to contain all NULs,
104     * in which case the method returns 0L
105     * (this allows for missing fields).</p>
106     *
107     * <p>To work-around some tar implementations that insert a
108     * leading NUL this method returns 0 if it detects a leading NUL
109     * since Commons Compress 1.4.</p>
110     *
111     * @param buffer The buffer from which to parse.
112     * @param offset The offset into the buffer from which to parse.
113     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
114     * @return The long value of the octal string.
115     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
116     */
117    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
118        long    result = 0;
119        int     end = offset + length;
120        int     start = offset;
121
122        if (length < 2){
123            throw new IllegalArgumentException("Length "+length+" must be at least 2");
124        }
125
126        if (buffer[start] == 0) {
127            return 0L;
128        }
129
130        // Skip leading spaces
131        while (start < end){
132            if (buffer[start] != ' ') {
133                break;
134            }
135            start++;
136        }
137
138        // Trim all trailing NULs and spaces.
139        // The ustar and POSIX tar specs require a trailing NUL or
140        // space but some implementations use the extra digit for big
141        // sizes/uids/gids ...
142        byte trailer = buffer[end - 1];
143        while (start < end && (trailer == 0 || trailer == ' ')) {
144            end--;
145            trailer = buffer[end - 1];
146        }
147
148        for ( ;start < end; start++) {
149            final byte currentByte = buffer[start];
150            // CheckStyle:MagicNumber OFF
151            if (currentByte < '0' || currentByte > '7'){
152                throw new IllegalArgumentException(
153                        exceptionMessage(buffer, offset, length, start, currentByte));
154            }
155            result = (result << 3) + (currentByte - '0'); // convert from ASCII
156            // CheckStyle:MagicNumber ON
157        }
158
159        return result;
160    }
161
162    /**
163     * Compute the value contained in a byte buffer.  If the most
164     * significant bit of the first byte in the buffer is set, this
165     * bit is ignored and the rest of the buffer is interpreted as a
166     * binary number.  Otherwise, the buffer is interpreted as an
167     * octal number as per the parseOctal function above.
168     *
169     * @param buffer The buffer from which to parse.
170     * @param offset The offset into the buffer from which to parse.
171     * @param length The maximum number of bytes to parse.
172     * @return The long value of the octal or binary string.
173     * @throws IllegalArgumentException if the trailing space/NUL is
174     * missing or an invalid byte is detected in an octal number, or
175     * if a binary number would exceed the size of a signed long
176     * 64-bit integer.
177     * @since 1.4
178     */
179    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
180                                          final int length) {
181
182        if ((buffer[offset] & 0x80) == 0) {
183            return parseOctal(buffer, offset, length);
184        }
185        final boolean negative = buffer[offset] == (byte) 0xff;
186        if (length < 9) {
187            return parseBinaryLong(buffer, offset, length, negative);
188        }
189        return parseBinaryBigInteger(buffer, offset, length, negative);
190    }
191
192    private static long parseBinaryLong(final byte[] buffer, final int offset,
193                                        final int length,
194                                        final boolean negative) {
195        if (length >= 9) {
196            throw new IllegalArgumentException("At offset " + offset + ", "
197                                               + length + " byte binary number"
198                                               + " exceeds maximum signed long"
199                                               + " value");
200        }
201        long val = 0;
202        for (int i = 1; i < length; i++) {
203            val = (val << 8) + (buffer[offset + i] & 0xff);
204        }
205        if (negative) {
206            // 2's complement
207            val--;
208            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
209        }
210        return negative ? -val : val;
211    }
212
213    private static long parseBinaryBigInteger(final byte[] buffer,
214                                              final int offset,
215                                              final int length,
216                                              final boolean negative) {
217        final byte[] remainder = new byte[length - 1];
218        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
219        BigInteger val = new BigInteger(remainder);
220        if (negative) {
221            // 2's complement
222            val = val.add(BigInteger.valueOf(-1)).not();
223        }
224        if (val.bitLength() > 63) {
225            throw new IllegalArgumentException("At offset " + offset + ", "
226                                               + length + " byte binary number"
227                                               + " exceeds maximum signed long"
228                                               + " value");
229        }
230        return negative ? -val.longValue() : val.longValue();
231    }
232
233    /**
234     * Parse a boolean byte from a buffer.
235     * Leading spaces and NUL are ignored.
236     * The buffer may contain trailing spaces or NULs.
237     *
238     * @param buffer The buffer from which to parse.
239     * @param offset The offset into the buffer from which to parse.
240     * @return The boolean value of the bytes.
241     * @throws IllegalArgumentException if an invalid byte is detected.
242     */
243    public static boolean parseBoolean(final byte[] buffer, final int offset) {
244        return buffer[offset] == 1;
245    }
246
247    // Helper method to generate the exception message
248    private static String exceptionMessage(final byte[] buffer, final int offset,
249            final int length, final int current, final byte currentByte) {
250        // default charset is good enough for an exception message,
251        //
252        // the alternative was to modify parseOctal and
253        // parseOctalOrBinary to receive the ZipEncoding of the
254        // archive (deprecating the existing public methods, of
255        // course) and dealing with the fact that ZipEncoding#decode
256        // can throw an IOException which parseOctal* doesn't declare
257        String string = new String(buffer, offset, length);
258
259        string=string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
260        return "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
261    }
262
263    /**
264     * Parse an entry name from a buffer.
265     * Parsing stops when a NUL is found
266     * or the buffer length is reached.
267     *
268     * @param buffer The buffer from which to parse.
269     * @param offset The offset into the buffer from which to parse.
270     * @param length The maximum number of bytes to parse.
271     * @return The entry name.
272     */
273    public static String parseName(final byte[] buffer, final int offset, final int length) {
274        try {
275            return parseName(buffer, offset, length, DEFAULT_ENCODING);
276        } catch (final IOException ex) { // NOSONAR
277            try {
278                return parseName(buffer, offset, length, FALLBACK_ENCODING);
279            } catch (final IOException ex2) {
280                // impossible
281                throw new RuntimeException(ex2); //NOSONAR
282            }
283        }
284    }
285
286    /**
287     * Parse an entry name from a buffer.
288     * Parsing stops when a NUL is found
289     * or the buffer length is reached.
290     *
291     * @param buffer The buffer from which to parse.
292     * @param offset The offset into the buffer from which to parse.
293     * @param length The maximum number of bytes to parse.
294     * @param encoding name of the encoding to use for file names
295     * @since 1.4
296     * @return The entry name.
297     * @throws IOException on error
298     */
299    public static String parseName(final byte[] buffer, final int offset,
300                                   final int length,
301                                   final ZipEncoding encoding)
302        throws IOException {
303
304        int len = 0;
305        for (int i = offset; len < length && buffer[i] != 0; i++) {
306            len++;
307        }
308        if (len > 0) {
309            final byte[] b = new byte[len];
310            System.arraycopy(buffer, offset, b, 0, len);
311            return encoding.decode(b);
312        }
313        return "";
314    }
315
316    /**
317     * Parses the content of a PAX 1.0 sparse block.
318     * @since 1.20
319     * @param buffer The buffer from which to parse.
320     * @param offset The offset into the buffer from which to parse.
321     * @return a parsed sparse struct
322     */
323    public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
324        final long sparseOffset = parseOctalOrBinary(buffer, offset, SPARSE_OFFSET_LEN);
325        final long sparseNumbytes = parseOctalOrBinary(buffer, offset + SPARSE_OFFSET_LEN, SPARSE_NUMBYTES_LEN);
326
327        return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
328    }
329
330    /**
331     * @since 1.21
332     */
333    static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries)
334        throws IOException {
335        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
336        for (int i = 0; i < entries; i++) {
337            try {
338                final TarArchiveStructSparse sparseHeader =
339                    parseSparse(buffer, offset + i * (SPARSE_OFFSET_LEN + SPARSE_NUMBYTES_LEN));
340
341                if (sparseHeader.getOffset() < 0) {
342                    throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
343                }
344                if (sparseHeader.getNumbytes() < 0) {
345                    throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
346                }
347                sparseHeaders.add(sparseHeader);
348            } catch (IllegalArgumentException ex) {
349                // thrown internally by parseOctalOrBinary
350                throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
351            }
352        }
353        return Collections.unmodifiableList(sparseHeaders);
354    }
355
356    /**
357     * Copy a name into a buffer.
358     * Copies characters from the name into the buffer
359     * starting at the specified offset.
360     * If the buffer is longer than the name, the buffer
361     * is filled with trailing NULs.
362     * If the name is longer than the buffer,
363     * the output is truncated.
364     *
365     * @param name The header name from which to copy the characters.
366     * @param buf The buffer where the name is to be stored.
367     * @param offset The starting offset into the buffer
368     * @param length The maximum number of header bytes to copy.
369     * @return The updated offset, i.e. offset + length
370     */
371    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
372        try {
373            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
374        } catch (final IOException ex) { // NOSONAR
375            try {
376                return formatNameBytes(name, buf, offset, length,
377                                       FALLBACK_ENCODING);
378            } catch (final IOException ex2) {
379                // impossible
380                throw new RuntimeException(ex2); //NOSONAR
381            }
382        }
383    }
384
385    /**
386     * Copy a name into a buffer.
387     * Copies characters from the name into the buffer
388     * starting at the specified offset.
389     * If the buffer is longer than the name, the buffer
390     * is filled with trailing NULs.
391     * If the name is longer than the buffer,
392     * the output is truncated.
393     *
394     * @param name The header name from which to copy the characters.
395     * @param buf The buffer where the name is to be stored.
396     * @param offset The starting offset into the buffer
397     * @param length The maximum number of header bytes to copy.
398     * @param encoding name of the encoding to use for file names
399     * @since 1.4
400     * @return The updated offset, i.e. offset + length
401     * @throws IOException on error
402     */
403    public static int formatNameBytes(final String name, final byte[] buf, final int offset,
404                                      final int length,
405                                      final ZipEncoding encoding)
406        throws IOException {
407        int len = name.length();
408        ByteBuffer b = encoding.encode(name);
409        while (b.limit() > length && len > 0) {
410            b = encoding.encode(name.substring(0, --len));
411        }
412        final int limit = b.limit() - b.position();
413        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
414
415        // Pad any remaining output bytes with NUL
416        for (int i = limit; i < length; ++i) {
417            buf[offset + i] = 0;
418        }
419
420        return offset + length;
421    }
422
423    /**
424     * Fill buffer with unsigned octal number, padded with leading zeroes.
425     *
426     * @param value number to convert to octal - treated as unsigned
427     * @param buffer destination buffer
428     * @param offset starting offset in buffer
429     * @param length length of buffer to fill
430     * @throws IllegalArgumentException if the value will not fit in the buffer
431     */
432    public static void formatUnsignedOctalString(final long value, final byte[] buffer,
433            final int offset, final int length) {
434        int remaining = length;
435        remaining--;
436        if (value == 0) {
437            buffer[offset + remaining--] = (byte) '0';
438        } else {
439            long val = value;
440            for (; remaining >= 0 && val != 0; --remaining) {
441                // CheckStyle:MagicNumber OFF
442                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
443                val = val >>> 3;
444                // CheckStyle:MagicNumber ON
445            }
446            if (val != 0){
447                throw new IllegalArgumentException
448                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
449            }
450        }
451
452        for (; remaining >= 0; --remaining) { // leading zeros
453            buffer[offset + remaining] = (byte) '0';
454        }
455    }
456
457    /**
458     * Write an octal integer into a buffer.
459     *
460     * Uses {@link #formatUnsignedOctalString} to format
461     * the value as an octal string with leading zeros.
462     * The converted number is followed by space and NUL
463     *
464     * @param value The value to write
465     * @param buf The buffer to receive the output
466     * @param offset The starting offset into the buffer
467     * @param length The size of the output buffer
468     * @return The updated offset, i.e offset+length
469     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
470     */
471    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
472
473        int idx=length-2; // For space and trailing null
474        formatUnsignedOctalString(value, buf, offset, idx);
475
476        buf[offset + idx++] = (byte) ' '; // Trailing space
477        buf[offset + idx]   = 0; // Trailing null
478
479        return offset + length;
480    }
481
482    /**
483     * Write an octal long integer into a buffer.
484     *
485     * Uses {@link #formatUnsignedOctalString} to format
486     * the value as an octal string with leading zeros.
487     * The converted number is followed by a space.
488     *
489     * @param value The value to write as octal
490     * @param buf The destinationbuffer.
491     * @param offset The starting offset into the buffer.
492     * @param length The length of the buffer
493     * @return The updated offset
494     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
495     */
496    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
497
498        final int idx=length-1; // For space
499
500        formatUnsignedOctalString(value, buf, offset, idx);
501        buf[offset + idx] = (byte) ' '; // Trailing space
502
503        return offset + length;
504    }
505
506    /**
507     * Write an long integer into a buffer as an octal string if this
508     * will fit, or as a binary number otherwise.
509     *
510     * Uses {@link #formatUnsignedOctalString} to format
511     * the value as an octal string with leading zeros.
512     * The converted number is followed by a space.
513     *
514     * @param value The value to write into the buffer.
515     * @param buf The destination buffer.
516     * @param offset The starting offset into the buffer.
517     * @param length The length of the buffer.
518     * @return The updated offset.
519     * @throws IllegalArgumentException if the value (and trailer)
520     * will not fit in the buffer.
521     * @since 1.4
522     */
523    public static int formatLongOctalOrBinaryBytes(
524        final long value, final byte[] buf, final int offset, final int length) {
525
526        // Check whether we are dealing with UID/GID or SIZE field
527        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
528
529        final boolean negative = value < 0;
530        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
531            return formatLongOctalBytes(value, buf, offset, length);
532        }
533
534        if (length < 9) {
535            formatLongBinary(value, buf, offset, length, negative);
536        } else {
537            formatBigIntegerBinary(value, buf, offset, length, negative);
538        }
539
540        buf[offset] = (byte) (negative ? 0xff : 0x80);
541        return offset + length;
542    }
543
544    private static void formatLongBinary(final long value, final byte[] buf,
545                                         final int offset, final int length,
546                                         final boolean negative) {
547        final int bits = (length - 1) * 8;
548        final long max = 1L << bits;
549        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
550        if (val < 0 || val >= max) {
551            throw new IllegalArgumentException("Value " + value +
552                " is too large for " + length + " byte field.");
553        }
554        if (negative) {
555            val ^= max - 1;
556            val++;
557            val |= 0xffL << bits;
558        }
559        for (int i = offset + length - 1; i >= offset; i--) {
560            buf[i] = (byte) val;
561            val >>= 8;
562        }
563    }
564
565    private static void formatBigIntegerBinary(final long value, final byte[] buf,
566                                               final int offset,
567                                               final int length,
568                                               final boolean negative) {
569        final BigInteger val = BigInteger.valueOf(value);
570        final byte[] b = val.toByteArray();
571        final int len = b.length;
572        if (len > length - 1) {
573            throw new IllegalArgumentException("Value " + value +
574                " is too large for " + length + " byte field.");
575        }
576        final int off = offset + length - len;
577        System.arraycopy(b, 0, buf, off, len);
578        final byte fill = (byte) (negative ? 0xff : 0);
579        for (int i = offset + 1; i < off; i++) {
580            buf[i] = fill;
581        }
582    }
583
584    /**
585     * Writes an octal value into a buffer.
586     *
587     * Uses {@link #formatUnsignedOctalString} to format
588     * the value as an octal string with leading zeros.
589     * The converted number is followed by NUL and then space.
590     *
591     * @param value The value to convert
592     * @param buf The destination buffer
593     * @param offset The starting offset into the buffer.
594     * @param length The size of the buffer.
595     * @return The updated value of offset, i.e. offset+length
596     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
597     */
598    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
599
600        int idx=length-2; // for NUL and space
601        formatUnsignedOctalString(value, buf, offset, idx);
602
603        buf[offset + idx++]   = 0; // Trailing null
604        buf[offset + idx]     = (byte) ' '; // Trailing space
605
606        return offset + length;
607    }
608
609    /**
610     * Compute the checksum of a tar entry header.
611     *
612     * @param buf The tar entry's header buffer.
613     * @return The computed checksum.
614     */
615    public static long computeCheckSum(final byte[] buf) {
616        long sum = 0;
617
618        for (final byte element : buf) {
619            sum += BYTE_MASK & element;
620        }
621
622        return sum;
623    }
624
625    /**
626     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
627     * <blockquote>
628     * The checksum is calculated by taking the sum of the unsigned byte values
629     * of the header block with the eight checksum bytes taken to be ascii
630     * spaces (decimal value 32). It is stored as a six digit octal number with
631     * leading zeroes followed by a NUL and then a space. Various
632     * implementations do not adhere to this format. For better compatibility,
633     * ignore leading and trailing whitespace, and get the first six digits. In
634     * addition, some historic tar implementations treated bytes as signed.
635     * Implementations typically calculate the checksum both ways, and treat it
636     * as good if either the signed or unsigned sum matches the included
637     * checksum.
638     * </blockquote>
639     * <p>
640     * The return value of this method should be treated as a best-effort
641     * heuristic rather than an absolute and final truth. The checksum
642     * verification logic may well evolve over time as more special cases
643     * are encountered.
644     *
645     * @param header tar header
646     * @return whether the checksum is reasonably good
647     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
648     * @since 1.5
649     */
650    public static boolean verifyCheckSum(final byte[] header) {
651        final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN);
652        long unsignedSum = 0;
653        long signedSum = 0;
654
655        for (int i = 0; i < header.length; i++) {
656            byte b = header[i];
657            if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
658                b = ' ';
659            }
660            unsignedSum += 0xff & b;
661            signedSum += b;
662        }
663        return storedSum == unsignedSum || storedSum == signedSum;
664    }
665
666    /**
667     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
668     * may appear multi times, and they look like:
669     *
670     * GNU.sparse.size=size
671     * GNU.sparse.numblocks=numblocks
672     * repeat numblocks times
673     *   GNU.sparse.offset=offset
674     *   GNU.sparse.numbytes=numbytes
675     * end repeat
676     *
677     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
678     *
679     * GNU.sparse.map
680     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
681     *
682     * @param inputStream input stream to read keys and values
683     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
684     *                      the sparse headers need to be stored in an array, not a map
685     * @param globalPaxHeaders global PAX headers of the tar archive
686     * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry.
687     * @throws IOException if an I/O error occurs.
688     * @deprecated use the four-arg version instead
689     */
690    @Deprecated
691    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders)
692            throws IOException {
693        return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
694    }
695
696    /**
697     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
698     * may appear multi times, and they look like:
699     *
700     * GNU.sparse.size=size
701     * GNU.sparse.numblocks=numblocks
702     * repeat numblocks times
703     *   GNU.sparse.offset=offset
704     *   GNU.sparse.numbytes=numbytes
705     * end repeat
706     *
707     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
708     *
709     * GNU.sparse.map
710     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
711     *
712     * @param inputStream input stream to read keys and values
713     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
714     *                      the sparse headers need to be stored in an array, not a map
715     * @param globalPaxHeaders global PAX headers of the tar archive
716     * @param headerSize total size of the PAX header, will be ignored if negative
717     * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry.
718     * @throws IOException if an I/O error occurs.
719     * @since 1.21
720     */
721    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream,
722            final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders,
723            final long headerSize) throws IOException {
724        final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
725        Long offset = null;
726        // Format is "length keyword=value\n";
727        int totalRead = 0;
728        while(true) { // get length
729            int ch;
730            int len = 0;
731            int read = 0;
732            while((ch = inputStream.read()) != -1) {
733                read++;
734                totalRead++;
735                if (ch == '\n') { // blank line in header
736                    break;
737                }
738                if (ch == ' '){ // End of length string
739                    // Get keyword
740                    final ByteArrayOutputStream coll = new ByteArrayOutputStream();
741                    while((ch = inputStream.read()) != -1) {
742                        read++;
743                        totalRead++;
744                        if (totalRead < 0 || (headerSize >= 0 && totalRead >= headerSize)) {
745                            break;
746                        }
747                        if (ch == '='){ // end of keyword
748                            final String keyword = coll.toString(CharsetNames.UTF_8);
749                            // Get rest of entry
750                            final int restLen = len - read;
751                            if (restLen <= 1) { // only NL
752                                headers.remove(keyword);
753                            } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
754                                throw new IOException("Paxheader value size " + restLen
755                                    + " exceeds size of header record");
756                            } else {
757                                final byte[] rest = IOUtils.readRange(inputStream, restLen);
758                                final int got = rest.length;
759                                if (got != restLen) {
760                                    throw new IOException("Failed to read "
761                                            + "Paxheader. Expected "
762                                            + restLen
763                                            + " bytes, read "
764                                            + got);
765                                }
766                                totalRead += restLen;
767                                // Drop trailing NL
768                                if (rest[restLen - 1] != '\n') {
769                                    throw new IOException("Failed to read Paxheader."
770                                       + "Value should end with a newline");
771                                }
772                                final String value = new String(rest, 0,
773                                        restLen - 1, StandardCharsets.UTF_8);
774                                headers.put(keyword, value);
775
776                                // for 0.0 PAX Headers
777                                if (keyword.equals("GNU.sparse.offset")) {
778                                    if (offset != null) {
779                                        // previous GNU.sparse.offset header but but no numBytes
780                                        sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
781                                    }
782                                    try {
783                                        offset = Long.valueOf(value);
784                                    } catch (NumberFormatException ex) {
785                                        throw new IOException("Failed to read Paxheader."
786                                            + "GNU.sparse.offset contains a non-numeric value");
787                                    }
788                                    if (offset < 0) {
789                                        throw new IOException("Failed to read Paxheader."
790                                            + "GNU.sparse.offset contains negative value");
791                                    }
792                                }
793
794                                // for 0.0 PAX Headers
795                                if (keyword.equals("GNU.sparse.numbytes")) {
796                                    if (offset == null) {
797                                        throw new IOException("Failed to read Paxheader." +
798                                                "GNU.sparse.offset is expected before GNU.sparse.numbytes shows up.");
799                                    }
800                                    long numbytes;
801                                    try {
802                                        numbytes = Long.parseLong(value);
803                                    } catch (NumberFormatException ex) {
804                                        throw new IOException("Failed to read Paxheader."
805                                            + "GNU.sparse.numbytes contains a non-numeric value.");
806                                    }
807                                    if (numbytes < 0) {
808                                        throw new IOException("Failed to read Paxheader."
809                                            + "GNU.sparse.numbytes contains negative value");
810                                    }
811                                    sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
812                                    offset = null;
813                                }
814                            }
815                            break;
816                        }
817                        coll.write((byte) ch);
818                    }
819                    break; // Processed single header
820                }
821
822                // COMPRESS-530 : throw if we encounter a non-number while reading length
823                if (ch < '0' || ch > '9') {
824                    throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
825                }
826
827                len *= 10;
828                len += ch - '0';
829            }
830            if (ch == -1){ // EOF
831                break;
832            }
833        }
834        if (offset != null) {
835            // offset but no numBytes
836            sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
837        }
838        return headers;
839    }
840
841    /**
842     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
843     * GNU.sparse.map
844     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
845     *
846     * <p>Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You
847     * should use {@link #parseFromPAX01SparseHeaders} directly instead.
848     *
849     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
850     * @return sparse headers parsed from sparse map
851     * @deprecated use #parseFromPAX01SparseHeaders instead
852     */
853    protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(String sparseMap) {
854        try {
855            return parseFromPAX01SparseHeaders(sparseMap);
856        } catch (IOException ex) {
857            throw new RuntimeException(ex.getMessage(), ex);
858        }
859    }
860
861    /**
862     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
863     * GNU.sparse.map
864     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
865     *
866     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
867     * @return unmodifiable list of sparse headers parsed from sparse map
868     * @throws IOException Corrupted TAR archive.
869     * @since 1.21
870     */
871    protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(String sparseMap)
872        throws IOException {
873        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
874        String[] sparseHeaderStrings = sparseMap.split(",");
875        if (sparseHeaderStrings.length % 2 == 1) {
876            throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
877        }
878
879        for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
880            long sparseOffset;
881            try {
882                sparseOffset = Long.parseLong(sparseHeaderStrings[i]);
883            } catch (NumberFormatException ex) {
884                throw new IOException("Corrupted TAR archive."
885                    + " Sparse struct offset contains a non-numeric value");
886            }
887            if (sparseOffset < 0) {
888                throw new IOException("Corrupted TAR archive."
889                    + " Sparse struct offset contains negative value");
890            }
891            long sparseNumbytes;
892            try {
893                sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]);
894            } catch (NumberFormatException ex) {
895                throw new IOException("Corrupted TAR archive."
896                    + " Sparse struct numbytes contains a non-numeric value");
897            }
898            if (sparseNumbytes < 0) {
899                throw new IOException("Corrupted TAR archive."
900                    + " Sparse struct numbytes contains negative value");
901            }
902            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
903        }
904
905        return Collections.unmodifiableList(sparseHeaders);
906    }
907
908    /**
909     * For PAX Format 1.X:
910     * The sparse map itself is stored in the file data block, preceding the actual file data.
911     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
912     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
913     * giving the offset and size of the data block it describes.
914     * @param inputStream parsing source.
915     * @param recordSize The size the TAR header 
916     * @return sparse headers
917     * @throws IOException if an I/O error occurs.
918     */
919    protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
920        // for 1.X PAX Headers
921        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
922        long bytesRead = 0;
923
924        long[] readResult = readLineOfNumberForPax1X(inputStream);
925        long sparseHeadersCount = readResult[0];
926        if (sparseHeadersCount < 0) {
927            // overflow while reading number?
928            throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
929        }
930        bytesRead += readResult[1];
931        while (sparseHeadersCount-- > 0) {
932            readResult = readLineOfNumberForPax1X(inputStream);
933            final long sparseOffset = readResult[0];
934            if (sparseOffset < 0) {
935                throw new IOException("Corrupted TAR archive."
936                    + " Sparse header block offset contains negative value");
937            }
938            bytesRead += readResult[1];
939
940            readResult = readLineOfNumberForPax1X(inputStream);
941            final long sparseNumbytes = readResult[0];
942            if (sparseNumbytes < 0) {
943                throw new IOException("Corrupted TAR archive."
944                    + " Sparse header block numbytes contains negative value");
945            }
946            bytesRead += readResult[1];
947            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
948        }
949
950        // skip the rest of this record data
951        long bytesToSkip = recordSize - bytesRead % recordSize;
952        IOUtils.skip(inputStream, bytesToSkip);
953        return sparseHeaders;
954    }
955
956    /**
957     * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data.
958     * It consists of a series of decimal numbers delimited by newlines.
959     *
960     * @param inputStream the input stream of the tar file
961     * @return the decimal number delimited by '\n', and the bytes read from input stream
962     * @throws IOException
963     */
964    private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException {
965        int number;
966        long result = 0;
967        long bytesRead = 0;
968
969        while ((number = inputStream.read()) != '\n') {
970            bytesRead += 1;
971            if (number == -1) {
972                throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
973            }
974            if (number < '0' || number > '9') {
975                throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
976            }
977            result = result * 10 + (number - '0');
978        }
979        bytesRead += 1;
980
981        return new long[]{result, bytesRead};
982    }
983
984}