UnixCompressFilter.java

  1. /* Copyright 2002-2025 CS GROUP
  2.  * Licensed to CS GROUP (CS) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * CS licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *   http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.orekit.data;

  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.util.Arrays;

  21. import org.hipparchus.util.FastMath;
  22. import org.orekit.errors.OrekitException;
  23. import org.orekit.errors.OrekitIOException;
  24. import org.orekit.errors.OrekitMessages;

  25. /** Filter for Unix compressed data.
  26.  * @author Luc Maisonobe
  27.  * @since 9.2
  28.  */
  29. public class UnixCompressFilter implements DataFilter {

  30.     /** Suffix for Unix compressed files. */
  31.     private static final String SUFFIX = ".Z";

  32.     /** Empty constructor.
  33.      * <p>
  34.      * This constructor is not strictly necessary, but it prevents spurious
  35.      * javadoc warnings with JDK 18 and later.
  36.      * </p>
  37.      * @since 12.0
  38.      */
  39.     public UnixCompressFilter() {
  40.         // nothing to do
  41.     }

  42.     /** {@inheritDoc} */
  43.     @Override
  44.     public DataSource filter(final DataSource original) {
  45.         final String            oName   = original.getName();
  46.         final DataSource.Opener oOpener = original.getOpener();
  47.         if (oName.endsWith(SUFFIX)) {
  48.             final String                  fName   = oName.substring(0, oName.length() - SUFFIX.length());
  49.             final DataSource.StreamOpener fOpener = () -> new ZInputStream(oName, new Buffer(oOpener.openStreamOnce()));
  50.             return new DataSource(fName, fOpener);
  51.         } else {
  52.             return original;
  53.         }
  54.     }

  55.     /** Filtering of Unix compressed stream. */
  56.     private static class ZInputStream extends InputStream {

  57.         /** First magic header byte. */
  58.         private static final int MAGIC_HEADER_1 = 0x1f;

  59.         /** Second magic header byte. */
  60.         private static final int MAGIC_HEADER_2 = 0x9d;

  61.         /** Byte bits width. */
  62.         private static final int BYTE_WIDTH = 8;

  63.         /** Initial bits width. */
  64.         private static final int INIT_WIDTH = 9;

  65.         /** Reset table code. */
  66.         private static final int RESET_TABLE = 256;

  67.         /** First non-predefined entry. */
  68.         private static final int FIRST = 257;

  69.         /** File name. */
  70.         private final String name;

  71.         /** Indicator for end of input. */
  72.         private boolean endOfInput;

  73.         /** Common sequences table. */
  74.         private final UncompressedSequence[] table;

  75.         /** Next available entry in the table. */
  76.         private int available;

  77.         /** Flag for block mode when table is full. */
  78.         private final boolean blockMode;

  79.         /** Maximum width allowed. */
  80.         private final int maxWidth;

  81.         /** Current input width in bits. */
  82.         private int currentWidth;

  83.         /** Maximum key that can be encoded with current width. */
  84.         private int currentMaxKey;

  85.         /** Number of bits read since last reset. */
  86.         private int bitsRead;

  87.         /** Lookahead byte, already read but not yet used. */
  88.         private int lookAhead;

  89.         /** Number of bits in the lookahead byte. */
  90.         private int lookAheadWidth;

  91.         /** Input buffer. */
  92.         private Buffer input;

  93.         /** Previous uncompressed sequence output. */
  94.         private UncompressedSequence previousSequence;

  95.         /** Uncompressed sequence being output. */
  96.         private UncompressedSequence currentSequence;

  97.         /** Number of bytes of the current sequence already output. */
  98.         private int alreadyOutput;

  99.         /** Simple constructor.
  100.          * @param name file name
  101.          * @param input underlying compressed stream
  102.          * @exception IOException if first bytes cannot be read
  103.          */
  104.         ZInputStream(final String name, final Buffer input)
  105.             throws IOException {

  106.             this.name       = name;
  107.             this.input      = input;
  108.             this.endOfInput = false;

  109.             // check header
  110.             if (input.getByte() != MAGIC_HEADER_1 || input.getByte() != MAGIC_HEADER_2) {
  111.                 throw new OrekitException(OrekitMessages.NOT_A_SUPPORTED_UNIX_COMPRESSED_FILE, name);
  112.             }

  113.             final int header3 = input.getByte();
  114.             this.blockMode = (header3 & 0x80) != 0;
  115.             this.maxWidth  = header3 & 0x1f;

  116.             // set up table, with at least all entries for one byte
  117.             this.table = new UncompressedSequence[1 << FastMath.max(INIT_WIDTH, maxWidth)];
  118.             for (int i = 0; i < FIRST; ++i) {
  119.                 table[i] = new UncompressedSequence(null, (byte) i);
  120.             }

  121.             // initialize decompression state
  122.             initialize();

  123.         }

  124.         /** Initialize compression state.
  125.          */
  126.         private void initialize() {
  127.             this.available        = FIRST;
  128.             this.bitsRead         = 0;
  129.             this.lookAhead        = 0;
  130.             this.lookAheadWidth   = 0;
  131.             this.currentWidth     = INIT_WIDTH;
  132.             this.currentMaxKey    = (1 << currentWidth) - 1;
  133.             this.previousSequence = null;
  134.             this.currentSequence  = null;
  135.             this.alreadyOutput    = 0;
  136.         }

  137.         /** Read next input key.
  138.          * @return next input key or -1 if end of stream is reached
  139.          * @exception IOException if a read error occurs
  140.          */
  141.         private int nextKey() throws IOException {

  142.             int keyMask = (1 << currentWidth) - 1;

  143.             while (true) {
  144.                 // initialize key with the last bits remaining from previous read
  145.                 int key = lookAhead & keyMask;

  146.                 // read more bits until key is complete
  147.                 for (int remaining = currentWidth - lookAheadWidth; remaining > 0; remaining -= BYTE_WIDTH) {
  148.                     lookAhead       = input.getByte();
  149.                     lookAheadWidth += BYTE_WIDTH;
  150.                     if (lookAhead < 0) {
  151.                         if (key == 0 || key == keyMask) {
  152.                             // the key is either a set of padding 0 bits
  153.                             // or a full key containing -1 if read() is called several times after EOF
  154.                             return -1;
  155.                         } else {
  156.                             // end of stream encountered in the middle of a read
  157.                             throw new OrekitIOException(OrekitMessages.UNEXPECTED_END_OF_FILE, name);
  158.                         }
  159.                     }
  160.                     key = (key | lookAhead << (currentWidth - remaining)) & keyMask;
  161.                 }

  162.                 // store the extra bits already read in the lookahead byte for next call
  163.                 lookAheadWidth -= currentWidth;
  164.                 lookAhead       = lookAhead >>> (BYTE_WIDTH - lookAheadWidth);

  165.                 bitsRead += currentWidth;

  166.                 if (blockMode && key == RESET_TABLE) {

  167.                     // skip the padding bits inserted when compressor flushed its buffer
  168.                     final int superSize = currentWidth * 8;
  169.                     int padding = (superSize - 1 - (bitsRead + superSize - 1) % superSize) / 8;
  170.                     while (padding-- > 0) {
  171.                         input.getByte();
  172.                     }

  173.                     // reset the table to handle a new block and read again next key
  174.                     Arrays.fill(table, FIRST, table.length, null);
  175.                     initialize();

  176.                     // reset the lookahead mask as the current width has changed
  177.                     keyMask = (1 << currentWidth) - 1;

  178.                 } else {
  179.                     // return key at current width
  180.                     return key;
  181.                 }

  182.             }

  183.         }

  184.         /** Select next uncompressed sequence to output.
  185.          * @return true if there is a next sequence
  186.          * @exception IOException if a read error occurs
  187.          */
  188.         private boolean selectNext() throws IOException {

  189.             // read next input key
  190.             final int key = nextKey();
  191.             if (key < 0) {
  192.                 // end of stream reached
  193.                 return false;
  194.             }

  195.             if (previousSequence != null && available < table.length) {
  196.                 // update the table with the next uncompressed byte appended to previous sequence
  197.                 final byte nextByte;
  198.                 if (key == available) {
  199.                     nextByte = previousSequence.getByte(0);
  200.                 } else if (table[key] != null) {
  201.                     nextByte = table[key].getByte(0);
  202.                 } else {
  203.                     throw new OrekitIOException(OrekitMessages.CORRUPTED_FILE, name);
  204.                 }
  205.                 table[available++] = new UncompressedSequence(previousSequence, nextByte);
  206.                 if (available > currentMaxKey && currentWidth < maxWidth) {
  207.                     // we need to increase the key size
  208.                     currentMaxKey = (1 << ++currentWidth) - 1;
  209.                 }
  210.             }

  211.             currentSequence = table[key];
  212.             if (currentSequence == null) {
  213.                 // the compressed file references a non-existent table entry
  214.                 // (this is not the well-known case of entry being used just before
  215.                 //  being defined, which is already handled above), the file is corrupted
  216.                 throw new OrekitIOException(OrekitMessages.CORRUPTED_FILE, name);
  217.             }
  218.             alreadyOutput   = 0;

  219.             return true;

  220.         }

  221.         /** {@inheritDoc} */
  222.         @Override
  223.         public int read() throws IOException {
  224.             final byte[] b = new byte[1];
  225.             return read(b, 0, 1) < 0 ? -1 : b[0];
  226.         }

  227.         /** {@inheritDoc} */
  228.         @Override
  229.         public int read(final byte[] b, final int offset, final int len) throws IOException {

  230.             if (currentSequence == null) {
  231.                 if (endOfInput || !selectNext()) {
  232.                     // we have reached end of data
  233.                     endOfInput = true;
  234.                     return -1;
  235.                 }
  236.             }

  237.             // copy as many bytes as possible from current sequence
  238.             final int n = FastMath.min(len, currentSequence.length() - alreadyOutput);
  239.             for (int i = 0; i < n; ++i) {
  240.                 b[offset + i] = currentSequence.getByte(alreadyOutput++);
  241.             }
  242.             if (alreadyOutput >= currentSequence.length()) {
  243.                 // we have just exhausted the current sequence
  244.                 previousSequence = currentSequence;
  245.                 currentSequence  = null;
  246.                 alreadyOutput    = 0;
  247.             }

  248.             return n;

  249.         }

  250.         /** {@inheritDoc} */
  251.         @Override
  252.         public int available() {
  253.             return currentSequence == null ? 0 : currentSequence.length() - alreadyOutput;
  254.         }

  255.     }

  256.     /** Uncompressed bits sequence. */
  257.     private static class UncompressedSequence {

  258.         /** Prefix sequence (null if this is a start sequence). */
  259.         private final UncompressedSequence prefix;

  260.         /** Last byte in the sequence. */
  261.         private final byte last;

  262.         /** Index of the last byte in the sequence (i.e. length - 1). */
  263.         private final int index;

  264.         /** Simple constructor.
  265.          * @param prefix prefix of the sequence (null if this is a start sequence)
  266.          * @param last last byte of the sequence
  267.          */
  268.         UncompressedSequence(final UncompressedSequence prefix, final byte last) {
  269.             this.prefix = prefix;
  270.             this.last   = last;
  271.             this.index  = prefix == null ? 0 : prefix.index + 1;
  272.         }

  273.         /** Get the length of the sequence.
  274.          * @return length of the sequence
  275.          */
  276.         public int length() {
  277.             return index + 1;
  278.         }

  279.         /** Get a byte from the sequence.
  280.          * @param outputIndex index of the byte in the sequence, counting from 0
  281.          * @return byte at {@code outputIndex}
  282.          */
  283.         public byte getByte(final int outputIndex) {
  284.             return index == outputIndex ? last : prefix.getByte(outputIndex);
  285.         }

  286.     }

  287.     /** Buffer for reading input data. */
  288.     private static class Buffer {

  289.         /** Size of input/output buffers. */
  290.         private static final int BUFFER_SIZE = 4096;

  291.         /** Underlying compressed stream. */
  292.         private final InputStream input;

  293.         /** Buffer data. */
  294.         private final byte[] data;

  295.         /** Start of pending data. */
  296.         private int start;

  297.         /** End of pending data. */
  298.         private int end;

  299.         /** Simple constructor.
  300.          * @param input input stream
  301.          */
  302.         Buffer(final InputStream input) {
  303.             this.input = input;
  304.             this.data  = new byte[BUFFER_SIZE];
  305.             this.start = 0;
  306.             this.end   = start;
  307.         }

  308.         /** Get one input byte.
  309.          * @return input byte, or -1 if end of input has been reached
  310.          * @throws IOException if input data cannot be read
  311.          */
  312.         private int getByte() throws IOException {

  313.             if (start == end) {
  314.                 // the buffer is empty
  315.                 start = 0;
  316.                 end   = input.read(data);
  317.                 if (end == -1) {
  318.                     return -1;
  319.                 }
  320.             }

  321.             return data[start++] & 0xFF;

  322.         }

  323.     }
  324. }