NetworkCrawler.java

  1. /* Copyright 2002-2013 CS Systèmes d'Information
  2.  * Licensed to CS Systèmes d'Information (CS) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * CS licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *   http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.orekit.data;

  18. import java.io.File;
  19. import java.io.IOException;
  20. import java.io.InputStream;
  21. import java.net.URISyntaxException;
  22. import java.net.URL;
  23. import java.net.URLConnection;
  24. import java.text.ParseException;
  25. import java.util.ArrayList;
  26. import java.util.List;
  27. import java.util.regex.Matcher;
  28. import java.util.regex.Pattern;
  29. import java.util.zip.GZIPInputStream;

  30. import org.apache.commons.math3.exception.util.DummyLocalizable;
  31. import org.orekit.errors.OrekitException;


  32. /** Provider for data files directly fetched from network.

  33.  * <p>
  34.  * This class handles a list of URLs pointing to data files or zip/jar on
  35.  * the net. Since the net is not a tree structure the list elements
  36.  * cannot be top elements recursively browsed as in {@link
  37.  * DirectoryCrawler}, they must be data files or zip/jar archives.
  38.  * </p>
  39.  * <p>
  40.  * The files fetched from network can be locally cached on disk. This prevents
  41.  * too frequent network access if the URLs are remote ones (for example
  42.  * original internet URLs).
  43.  * </p>
  44.  * <p>
  45.  * If the URL points to a remote server (typically on the web) on the other side
  46.  * of a proxy server, you need to configure the networking layer of your
  47.  * application to use the proxy. For a typical authenticating proxy as used in
  48.  * many corporate environments, this can be done as follows using for example
  49.  * the AuthenticatorDialog graphical authenticator class that can be found
  50.  * in the tests directories:
  51.  * <pre>
  52.  *   System.setProperty("http.proxyHost",     "proxy.your.domain.com");
  53.  *   System.setProperty("http.proxyPort",     "8080");
  54.  *   System.setProperty("http.nonProxyHosts", "localhost|*.your.domain.com");
  55.  *   Authenticator.setDefault(new AuthenticatorDialog());
  56.  * </pre>
  57.  * </p>
  58.  * <p>
  59.  * Gzip-compressed files are supported.
  60.  * </p>
  61.  * <p>
  62.  * Zip archives entries are supported recursively.
  63.  * </p>
  64.  * <p>
  65.  * This is a simple application of the <code>visitor</code> design pattern for
  66.  * list browsing.
  67.  * </p>
  68.  * @see DataProvidersManager
  69.  * @author Luc Maisonobe
  70.  */
  71. public class NetworkCrawler implements DataProvider {

  72.     /** URLs list. */
  73.     private final List<URL> urls;

  74.     /** Connection timeout (milliseconds). */
  75.     private int timeout;

  76.     /** Build a data classpath crawler.
  77.      * <p>The default timeout is set to 10 seconds.</p>
  78.      * @param urls list of data file URLs
  79.      */
  80.     public NetworkCrawler(final URL... urls) {

  81.         this.urls = new ArrayList<URL>();
  82.         for (final URL url : urls) {
  83.             this.urls.add(url);
  84.         }

  85.         timeout = 10000;

  86.     }

  87.     /** Set the timeout for connection.
  88.      * @param timeout connection timeout in milliseconds
  89.      */
  90.     public void setTimeout(final int timeout) {
  91.         this.timeout = timeout;
  92.     }

  93.     /** {@inheritDoc} */
  94.     public boolean feed(final Pattern supported, final DataLoader visitor)
  95.         throws OrekitException {

  96.         try {
  97.             OrekitException delayedException = null;
  98.             boolean loaded = false;
  99.             for (URL url : urls) {
  100.                 try {

  101.                     if (visitor.stillAcceptsData()) {
  102.                         final String name     = url.toURI().toString();
  103.                         final String fileName = new File(url.getPath()).getName();
  104.                         if (ZIP_ARCHIVE_PATTERN.matcher(fileName).matches()) {

  105.                             // browse inside the zip/jar file
  106.                             new ZipJarCrawler(url).feed(supported, visitor);
  107.                             loaded = true;

  108.                         } else {

  109.                             // remove suffix from gzip files
  110.                             final Matcher gzipMatcher = GZIP_FILE_PATTERN.matcher(fileName);
  111.                             final String baseName = gzipMatcher.matches() ? gzipMatcher.group(1) : fileName;

  112.                             if (supported.matcher(baseName).matches()) {

  113.                                 final InputStream stream = getStream(url);

  114.                                 // visit the current file
  115.                                 if (gzipMatcher.matches()) {
  116.                                     visitor.loadData(new GZIPInputStream(stream), name);
  117.                                 } else {
  118.                                     visitor.loadData(stream, name);
  119.                                 }

  120.                                 stream.close();
  121.                                 loaded = true;

  122.                             }

  123.                         }
  124.                     }

  125.                 } catch (OrekitException oe) {
  126.                     // maybe the next path component will be able to provide data
  127.                     // wait until all components have been tried
  128.                     delayedException = oe;
  129.                 }
  130.             }

  131.             if (!loaded && delayedException != null) {
  132.                 throw delayedException;
  133.             }

  134.             return loaded;

  135.         } catch (URISyntaxException use) {
  136.             throw new OrekitException(use, new DummyLocalizable(use.getMessage()));
  137.         } catch (IOException ioe) {
  138.             throw new OrekitException(ioe, new DummyLocalizable(ioe.getMessage()));
  139.         } catch (ParseException pe) {
  140.             throw new OrekitException(pe, new DummyLocalizable(pe.getMessage()));
  141.         }

  142.     }

  143.     /** Get the stream to read from the remote URL.
  144.      * @param url url to read from
  145.      * @return stream to read the content of the URL
  146.      * @throws IOException if the URL cannot be opened for reading
  147.      */
  148.     private InputStream getStream(final URL url) throws IOException {
  149.         final URLConnection connection = url.openConnection();
  150.         connection.setConnectTimeout(timeout);
  151.         return connection.getInputStream();
  152.     }

  153. }