XmlLexicalAnalyzer.java

  1. /* Copyright 2002-2025 CS GROUP
  2.  * Licensed to CS GROUP (CS) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * CS licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *   http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.orekit.files.ccsds.utils.lexical;

  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.io.Reader;
  21. import java.util.Collections;
  22. import java.util.HashMap;
  23. import java.util.Map;

  24. import javax.xml.parsers.ParserConfigurationException;
  25. import javax.xml.parsers.SAXParser;
  26. import javax.xml.parsers.SAXParserFactory;

  27. import org.hipparchus.exception.DummyLocalizable;
  28. import org.orekit.data.DataSource;
  29. import org.orekit.errors.OrekitException;
  30. import org.orekit.errors.OrekitMessages;
  31. import org.orekit.files.ccsds.utils.FileFormat;
  32. import org.xml.sax.Attributes;
  33. import org.xml.sax.InputSource;
  34. import org.xml.sax.Locator;
  35. import org.xml.sax.SAXException;
  36. import org.xml.sax.helpers.DefaultHandler;

  37. /** Lexical analyzer for XML CCSDS messages.
  38.  * @author Maxime Journot
  39.  * @author Luc Maisonobe
  40.  * @since 11.0
  41.  */
  42. public class XmlLexicalAnalyzer implements LexicalAnalyzer {

  43.     /** Source providing the data to analyze. */
  44.     private final DataSource source;

  45.     /** Simple constructor.
  46.      * @param source source providing the data to parse
  47.      */
  48.     public XmlLexicalAnalyzer(final DataSource source) {
  49.         this.source = source;
  50.     }

  51.     /** {@inheritDoc} */
  52.     @Override
  53.     public <T> T accept(final MessageParser<T> messageParser) {
  54.         try {
  55.             // Create the handler
  56.             final DefaultHandler handler = new XMLHandler(messageParser);

  57.             // Create the XML SAX parser factory
  58.             final SAXParserFactory factory = SAXParserFactory.newInstance();

  59.             // Build the parser
  60.             final SAXParser saxParser = factory.newSAXParser();

  61.             // Read the xml file
  62.             messageParser.reset(FileFormat.XML);
  63.             final DataSource.Opener opener = source.getOpener();
  64.             if (opener.rawDataIsBinary()) {
  65.                 try (InputStream is = opener.openStreamOnce()) {
  66.                     if (is == null) {
  67.                         throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
  68.                     }
  69.                     saxParser.parse(new InputSource(is), handler);
  70.                 }
  71.             } else {
  72.                 try (Reader reader = opener.openReaderOnce()) {
  73.                     if (reader == null) {
  74.                         throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
  75.                     }
  76.                     saxParser.parse(new InputSource(reader), handler);
  77.                 }
  78.             }

  79.             // Get the content of the file
  80.             return messageParser.build();

  81.         } catch (SAXException | ParserConfigurationException | IOException e) {
  82.             // throw caught exception as an OrekitException
  83.             throw new OrekitException(e, new DummyLocalizable(e.getMessage()));
  84.         }
  85.     }

  86.     /** Handler for parsing XML file formats.
  87.      */
  88.     private class XMLHandler extends DefaultHandler {

  89.         /** CCSDS Message parser to use. */
  90.         private final MessageParser<?> messageParser;

  91.         /** Builder for regular elements. */
  92.         private final XmlTokenBuilder regularBuilder;

  93.         /** Builders for special elements. */
  94.         private Map<String, XmlTokenBuilder> specialElements;

  95.         /** Locator used to get current line number. */
  96.         private Locator locator;

  97.         /** Name of the current element. */
  98.         private String currentElementName;

  99.         /** Line number of the current entry. */
  100.         private int currentLineNumber;

  101.         /** Content of the current entry. */
  102.         private String currentContent;

  103.         /** Attributes of the current element. */
  104.         private Map<String, String> currentAttributes;

  105.         /** Last processed token qualified name.
  106.          * @since 12.0
  107.          */
  108.         private String lastQname;

  109.         /** Last processed token start/end indicator.
  110.          * @since 12.0
  111.          */
  112.         private boolean lastWasStart;

  113.         /** Simple constructor.
  114.          * @param messageParser CCSDS Message parser to use
  115.          */
  116.         XMLHandler(final MessageParser<?> messageParser) {
  117.             this.messageParser   = messageParser;
  118.             this.regularBuilder  = new RegularXmlTokenBuilder();
  119.             this.specialElements = messageParser.getSpecialXmlElementsBuilders();
  120.             this.lastQname       = "";
  121.             this.lastWasStart    = false;
  122.         }

  123.         /** Get a builder for the current element.
  124.          * @param qName XML element ualified name
  125.          * @return builder for this element
  126.          */
  127.         private XmlTokenBuilder getBuilder(final String qName) {
  128.             final XmlTokenBuilder specialBuilder = specialElements.get(qName);
  129.             return (specialBuilder != null) ? specialBuilder : regularBuilder;
  130.         }

  131.         /** {@inheritDoc} */
  132.         @Override
  133.         public void setDocumentLocator(final Locator documentLocator) {
  134.             this.locator = documentLocator;
  135.         }

  136.         /** {@inheritDoc} */
  137.         @Override
  138.         public void characters(final char[] ch, final int start, final int length) throws SAXException {
  139.             // we are only interested in leaf elements between one start and one end tag
  140.             // when nested elements occur, this method is called with the spurious whitespace
  141.             // characters (space, tab, end of line) that occur between two successive start
  142.             // tags, two successive end tags, or one end tag and the following start tag of
  143.             // next element at same level.
  144.             // We need to identify the characters we want and the characters we drop.

  145.             // check if we are after a start tag (thus already dropping the characters
  146.             // between and end tag and a following start or end tag)
  147.             if (currentElementName != null) {
  148.                 // we are after a start tag, we don't know yet if the next tag will be
  149.                 // another start tag (in which case we ignore the characters) or if
  150.                 // it is the end tag of a leaf element, so we just store the characters
  151.                 // and will either use them or drop them when this next tag is seen
  152.                 currentLineNumber = locator.getLineNumber();
  153.                 this.currentContent = this.currentContent + new String(ch, start, length);
  154.             }
  155.         }

  156.         /** {@inheritDoc} */
  157.         @Override
  158.         public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) {

  159.             currentElementName = qName;
  160.             currentLineNumber  = locator.getLineNumber();
  161.             currentContent     = "";

  162.             // save attributes in separate map, to avoid overriding during parsing
  163.             if (attributes.getLength() == 0) {
  164.                 currentAttributes  = Collections.emptyMap();
  165.             } else {
  166.                 currentAttributes = new HashMap<>(attributes.getLength());
  167.                 for (int i = 0; i < attributes.getLength(); ++i) {
  168.                     currentAttributes.put(attributes.getQName(i), attributes.getValue(i));
  169.                 }
  170.             }

  171.             for (final ParseToken token : getBuilder(qName).
  172.                                           buildTokens(true, false, qName, getContent(), currentAttributes,
  173.                                                       currentLineNumber, source.getName())) {
  174.                 messageParser.process(token);
  175.             }
  176.             lastQname    = qName;
  177.             lastWasStart = true;

  178.         }

  179.         private String getContent() {
  180.             return currentContent.isEmpty() ? null : currentContent;
  181.         }

  182.         /** {@inheritDoc} */
  183.         @Override
  184.         public void endElement(final String uri, final String localName, final String qName) {

  185.             if (currentContent == null || currentContent.isEmpty()) {
  186.                 // for an end tag without content, we keep the line number of the end tag itself
  187.                 currentLineNumber = locator.getLineNumber();
  188.             }

  189.             // check if we are parsing the end tag of a leaf element
  190.             final boolean isLeaf = lastWasStart && qName.equals(lastQname);

  191.             for (final ParseToken token : getBuilder(qName).
  192.                                           buildTokens(false, isLeaf, qName, getContent(), currentAttributes,
  193.                                                       currentLineNumber, source.getName())) {
  194.                 messageParser.process(token);
  195.             }
  196.             lastQname    = qName;
  197.             lastWasStart = true;

  198.             currentElementName = null;
  199.             currentAttributes  = null;
  200.             currentLineNumber  = -1;
  201.             currentContent     = "";

  202.         }

  203.         /** {@inheritDoc} */
  204.         @Override
  205.         public InputSource resolveEntity(final String publicId, final String systemId) {
  206.             // disable external entities
  207.             return new InputSource();
  208.         }

  209.     }

  210. }