NetworkCrawler.java

  1. /* Copyright 2002-2018 CS Systèmes d'Information
  2.  * Licensed to CS Systèmes d'Information (CS) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * CS licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *   http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.orekit.data;

  18. import java.io.File;
  19. import java.io.IOException;
  20. import java.io.InputStream;
  21. import java.net.URISyntaxException;
  22. import java.net.URL;
  23. import java.net.URLConnection;
  24. import java.text.ParseException;
  25. import java.util.ArrayList;
  26. import java.util.List;
  27. import java.util.regex.Pattern;

  28. import org.hipparchus.exception.DummyLocalizable;
  29. import org.orekit.errors.OrekitException;


  30. /** Provider for data files directly fetched from network.

  31.  * <p>
  32.  * This class handles a list of URLs pointing to data files or zip/jar on
  33.  * the net. Since the net is not a tree structure the list elements
  34.  * cannot be top elements recursively browsed as in {@link
  35.  * DirectoryCrawler}, they must be data files or zip/jar archives.
  36.  * </p>
  37.  * <p>
  38.  * The files fetched from network can be locally cached on disk. This prevents
  39.  * too frequent network access if the URLs are remote ones (for example
  40.  * original internet URLs).
  41.  * </p>
  42.  * <p>
  43.  * If the URL points to a remote server (typically on the web) on the other side
  44.  * of a proxy server, you need to configure the networking layer of your
  45.  * application to use the proxy. For a typical authenticating proxy as used in
  46.  * many corporate environments, this can be done as follows using for example
  47.  * the AuthenticatorDialog graphical authenticator class that can be found
  48.  * in the tests directories:
  49.  * <pre>
  50.  *   System.setProperty("http.proxyHost",     "proxy.your.domain.com");
  51.  *   System.setProperty("http.proxyPort",     "8080");
  52.  *   System.setProperty("http.nonProxyHosts", "localhost|*.your.domain.com");
  53.  *   Authenticator.setDefault(new AuthenticatorDialog());
  54.  * </pre>
  55.  *
  56.  * <p>
  57.  * Gzip-compressed files are supported.
  58.  * </p>
  59.  * <p>
  60.  * Zip archives entries are supported recursively.
  61.  * </p>
  62.  * <p>
  63.  * This is a simple application of the <code>visitor</code> design pattern for
  64.  * list browsing.
  65.  * </p>
  66.  * @see DataProvidersManager
  67.  * @author Luc Maisonobe
  68.  */
  69. public class NetworkCrawler implements DataProvider {

  70.     /** URLs list. */
  71.     private final List<URL> urls;

  72.     /** Connection timeout (milliseconds). */
  73.     private int timeout;

  74.     /** Build a data classpath crawler.
  75.      * <p>The default timeout is set to 10 seconds.</p>
  76.      * @param urls list of data file URLs
  77.      */
  78.     public NetworkCrawler(final URL... urls) {

  79.         this.urls = new ArrayList<URL>();
  80.         for (final URL url : urls) {
  81.             this.urls.add(url);
  82.         }

  83.         timeout = 10000;

  84.     }

  85.     /** Set the timeout for connection.
  86.      * @param timeout connection timeout in milliseconds
  87.      */
  88.     public void setTimeout(final int timeout) {
  89.         this.timeout = timeout;
  90.     }

  91.     /** {@inheritDoc} */
  92.     public boolean feed(final Pattern supported, final DataLoader visitor)
  93.         throws OrekitException {

  94.         try {
  95.             OrekitException delayedException = null;
  96.             boolean loaded = false;
  97.             for (URL url : urls) {
  98.                 try {

  99.                     if (visitor.stillAcceptsData()) {
  100.                         final String name     = url.toURI().toString();
  101.                         final String fileName = new File(url.getPath()).getName();
  102.                         if (ZIP_ARCHIVE_PATTERN.matcher(fileName).matches()) {

  103.                             // browse inside the zip/jar file
  104.                             new ZipJarCrawler(url).feed(supported, visitor);
  105.                             loaded = true;

  106.                         } else {

  107.                             // apply all registered filters
  108.                             NamedData data = new NamedData(fileName, () -> getStream(url));
  109.                             data = DataProvidersManager.getInstance().applyAllFilters(data);

  110.                             if (supported.matcher(data.getName()).matches()) {
  111.                                 // visit the current file
  112.                                 try (InputStream input = data.getStreamOpener().openStream()) {
  113.                                     visitor.loadData(input, name);
  114.                                     loaded = true;
  115.                                 }
  116.                             }

  117.                         }
  118.                     }

  119.                 } catch (OrekitException oe) {
  120.                     // maybe the next path component will be able to provide data
  121.                     // wait until all components have been tried
  122.                     delayedException = oe;
  123.                 }
  124.             }

  125.             if (!loaded && delayedException != null) {
  126.                 throw delayedException;
  127.             }

  128.             return loaded;

  129.         } catch (URISyntaxException | IOException | ParseException e) {
  130.             throw new OrekitException(e, new DummyLocalizable(e.getMessage()));
  131.         }

  132.     }

  133.     /** Get the stream to read from the remote URL.
  134.      * @param url url to read from
  135.      * @return stream to read the content of the URL
  136.      * @throws IOException if the URL cannot be opened for reading
  137.      */
  138.     private InputStream getStream(final URL url) throws IOException {
  139.         final URLConnection connection = url.openConnection();
  140.         connection.setConnectTimeout(timeout);
  141.         return connection.getInputStream();
  142.     }

  143. }